From 16684f2affaf901b42a12e50f9c29e7c034ad7ea Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Wed, 12 Feb 2020 06:36:50 -0600 Subject: [PATCH 001/388] BUG: Avoid casting Int to object in Categorical.from_codes (#31794) --- doc/source/whatsnew/v1.0.2.rst | 4 ++++ pandas/core/arrays/categorical.py | 8 +++++++- .../arrays/categorical/test_constructors.py | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index f4bb8c580fb08..8f04032e1ca09 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -28,6 +28,10 @@ Fixed regressions Bug fixes ~~~~~~~~~ +**Categorical** + +- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) + **I/O** - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d26ff7490e714..0e04354ae7c89 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -644,7 +644,13 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): ) raise ValueError(msg) - codes = np.asarray(codes) # #21767 + if is_extension_array_dtype(codes) and is_integer_dtype(codes): + # Avoid the implicit conversion of Int to object + if isna(codes).any(): + raise ValueError("codes cannot contain NA values") + codes = codes.to_numpy(dtype=np.int64) + else: + codes = np.asarray(codes) if len(codes) and not is_integer_dtype(codes): raise ValueError("codes need to be array-like integers") diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 70e1421c8dcf4..dbd8fd8df67c1 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -560,6 +560,23 @@ def test_from_codes_neither(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1]) + def test_from_codes_with_nullable_int(self): + codes = pd.array([0, 1], dtype="Int64") + categories = ["a", "b"] + + result = Categorical.from_codes(codes, categories=categories) + expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_with_nullable_int_na_raises(self): + codes = pd.array([0, None], dtype="Int64") + categories = ["a", "b"] + + msg = "codes cannot contain NA values" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, categories=categories) + @pytest.mark.parametrize("dtype", [None, "category"]) def test_from_inferred_categories(self, dtype): cats = ["a", "b"] From 361a938dd82fdb5fdc1f7b1fff97de39326421e7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 12 Feb 2020 13:38:26 +0100 Subject: [PATCH 002/388] BUG: fix StringArray/PandasArray setitem with slice (#31773) --- doc/source/whatsnew/v1.0.2.rst | 4 ++++ pandas/core/arrays/numpy_.py | 4 ---- pandas/tests/extension/base/setitem.py | 23 +++++++++++++++++++++++ pandas/tests/extension/test_numpy.py | 8 ++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 8f04032e1ca09..b31f7c1166dc0 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -36,6 +36,10 @@ Bug fixes - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +**Experimental dtypes** + +- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) + .. --------------------------------------------------------------------------- .. _whatsnew_102.contributors: diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index e573fe661106e..0e64967ce93a6 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -263,12 +263,8 @@ def __setitem__(self, key, value) -> None: value = extract_array(value, extract_numpy=True) key = check_array_indexer(self, key) - scalar_key = lib.is_scalar(key) scalar_value = lib.is_scalar(value) - if not scalar_key and scalar_value: - key = np.asarray(key) - if not scalar_value: value = np.asarray(value, dtype=self._ndarray.dtype) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index e0ca603aaa0ed..590bcd586900a 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -173,6 +173,29 @@ def test_setitem_tuple_index(self, data): s[(0, 1)] = data[1] self.assert_series_equal(s, expected) + def test_setitem_slice(self, data, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[:3] = data[0] + self.assert_equal(arr, expected) + + def test_setitem_loc_iloc_slice(self, data): + arr = data[:5].copy() + s = pd.Series(arr, index=["a", "b", "c", "d", "e"]) + expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index) + + result = s.copy() + result.iloc[:3] = data[0] + self.assert_equal(result, expected) + + result = s.copy() + result.loc[:"c"] = data[0] + self.assert_equal(result, expected) + def test_setitem_slice_mismatch_length_raises(self, data): arr = data[:5] with pytest.raises(ValueError): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 8a820c8746857..76573242a2506 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -396,6 +396,14 @@ def test_setitem_scalar_key_sequence_raise(self, data): # Failed: DID NOT RAISE super().test_setitem_scalar_key_sequence_raise(data) + @skip_nested + def test_setitem_slice(self, data, box_in_series): + super().test_setitem_slice(data, box_in_series) + + @skip_nested + def test_setitem_loc_iloc_slice(self, data): + super().test_setitem_loc_iloc_slice(data) + @skip_nested class TestParsing(BaseNumPyTests, base.BaseParsingTests): From 9767da6a2215d8902706acd64cd1e2ee5d6b9e67 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 12 Feb 2020 16:04:31 +0100 Subject: [PATCH 003/388] BUG: fix parquet roundtrip with unsigned integer dtypes (#31918) --- doc/source/whatsnew/v1.0.2.rst | 2 ++ pandas/core/arrays/integer.py | 4 ++++ pandas/tests/arrays/test_integer.py | 17 +++++++++++++++-- pandas/tests/io/test_parquet.py | 13 ++++++++----- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index b31f7c1166dc0..d7c4f29ea2762 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -35,6 +35,8 @@ Bug fixes **I/O** - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). + **Experimental dtypes** diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 19ab43fc1c248..e0b6947394cc4 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -103,6 +103,10 @@ def __from_arrow__( import pyarrow # noqa: F811 from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask + pyarrow_type = pyarrow.from_numpy_dtype(self.type) + if not array.type.equals(pyarrow_type): + array = array.cast(pyarrow_type) + if isinstance(array, pyarrow.Array): chunks = [array] else: diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 9f0e6407c25f0..9186c33c12c06 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -1036,9 +1036,9 @@ def test_arrow_array(data): assert arr.equals(expected) -@td.skip_if_no("pyarrow", min_version="0.15.1.dev") +@td.skip_if_no("pyarrow", min_version="0.16.0") def test_arrow_roundtrip(data): - # roundtrip possible from arrow 1.0.0 + # roundtrip possible from arrow 0.16.0 import pyarrow as pa df = pd.DataFrame({"a": data}) @@ -1048,6 +1048,19 @@ def test_arrow_roundtrip(data): tm.assert_frame_equal(result, df) +@td.skip_if_no("pyarrow", min_version="0.16.0") +def test_arrow_from_arrow_uint(): + # https://github.com/pandas-dev/pandas/issues/31896 + # possible mismatch in types + import pyarrow as pa + + dtype = pd.UInt32Dtype() + result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) + expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") + + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize( "pandasmethname, kwargs", [ diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 7ed8d8f22764c..f76db9939641c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -533,25 +533,28 @@ def test_additional_extension_arrays(self, pa): df = pd.DataFrame( { "a": pd.Series([1, 2, 3], dtype="Int64"), - "b": pd.Series(["a", None, "c"], dtype="string"), + "b": pd.Series([1, 2, 3], dtype="UInt32"), + "c": pd.Series(["a", None, "c"], dtype="string"), } ) - if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15.1.dev"): + if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"): expected = df else: # de-serialized as plain int / object - expected = df.assign(a=df.a.astype("int64"), b=df.b.astype("object")) + expected = df.assign( + a=df.a.astype("int64"), b=df.b.astype("int64"), c=df.c.astype("object") + ) check_round_trip(df, pa, expected=expected) df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")}) - if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15.1.dev"): + if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"): expected = df else: # if missing values in integer, currently de-serialized as float expected = df.assign(a=df.a.astype("float64")) check_round_trip(df, pa, expected=expected) - @td.skip_if_no("pyarrow", min_version="0.15.1.dev") + @td.skip_if_no("pyarrow", min_version="0.16.0") def test_additional_extension_types(self, pa): # test additional ExtensionArrays that are supported through the # __arrow_array__ protocol + by defining a custom ExtensionType From 2154ad3b891de39b83089e0e52fc265b8c0c97ef Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 12 Feb 2020 16:05:28 +0100 Subject: [PATCH 004/388] BUG: fix infer_dtype for StringDtype (#31877) --- doc/source/whatsnew/v1.0.2.rst | 2 ++ pandas/_libs/lib.pyx | 2 +- pandas/conftest.py | 2 ++ pandas/tests/dtypes/test_inference.py | 18 ++++++++++++++++++ .../series/methods/test_convert_dtypes.py | 9 +++++++++ pandas/tests/test_strings.py | 4 ++++ 6 files changed, 36 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index d7c4f29ea2762..0216007ea5ba8 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -38,8 +38,10 @@ Bug fixes - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). + **Experimental dtypes** +- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 9702eb4615909..d2f0b2ffbaeec 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1005,7 +1005,7 @@ _TYPE_MAP = { 'complex64': 'complex', 'complex128': 'complex', 'c': 'complex', - 'string': 'bytes', + 'string': 'string', 'S': 'bytes', 'U': 'string', 'bool': 'boolean', diff --git a/pandas/conftest.py b/pandas/conftest.py index 7463b2b579c0c..821bec19d6115 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -744,6 +744,7 @@ def any_numpy_dtype(request): # categoricals are handled separately _any_skipna_inferred_dtype = [ ("string", ["a", np.nan, "c"]), + ("string", ["a", pd.NA, "c"]), ("bytes", [b"a", np.nan, b"c"]), ("empty", [np.nan, np.nan, np.nan]), ("empty", []), @@ -754,6 +755,7 @@ def any_numpy_dtype(request): ("mixed-integer-float", [1, np.nan, 2.0]), ("decimal", [Decimal(1), np.nan, Decimal(2)]), ("boolean", [True, np.nan, False]), + ("boolean", [True, pd.NA, False]), ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]), ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 48f9262ad3486..48ae1f67297af 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1200,6 +1200,24 @@ def test_interval(self): inferred = lib.infer_dtype(pd.Series(idx), skipna=False) assert inferred == "interval" + @pytest.mark.parametrize("klass", [pd.array, pd.Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]]) + def test_string_dtype(self, data, skipna, klass): + # StringArray + val = klass(data, dtype="string") + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "string" + + @pytest.mark.parametrize("klass", [pd.array, pd.Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]]) + def test_boolean_dtype(self, data, skipna, klass): + # BooleanArray + val = klass(data, dtype="boolean") + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "boolean" + class TestNumberScalar: def test_is_number(self): diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 923b5a94c5f41..a6b5fed40a9d7 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -246,3 +246,12 @@ def test_convert_dtypes(self, data, maindtype, params, answerdict): # Make sure original not changed tm.assert_series_equal(series, copy) + + def test_convert_string_dtype(self): + # https://github.com/pandas-dev/pandas/issues/31731 -> converting columns + # that are already string dtype + df = pd.DataFrame( + {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype="string" + ) + result = df.convert_dtypes() + tm.assert_frame_equal(df, result) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 62d26dacde67b..1338d801e39f4 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -7,6 +7,7 @@ from pandas._libs import lib +import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, notna import pandas._testing as tm import pandas.core.strings as strings @@ -207,6 +208,9 @@ def test_api_per_dtype(self, index_or_series, dtype, any_skipna_inferred_dtype): box = index_or_series inferred_dtype, values = any_skipna_inferred_dtype + if dtype == "category" and len(values) and values[1] is pd.NA: + pytest.xfail(reason="Categorical does not yet support pd.NA") + t = box(values, dtype=dtype) # explicit dtype to avoid casting # TODO: get rid of these xfails From dd03c196d684d6ed594ee58451b0a36d7935a1ea Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Feb 2020 15:58:56 +0000 Subject: [PATCH 005/388] TYP: pandas/core/dtypes/base.py (#31352) --- pandas/core/dtypes/base.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 618a35886a905..a4f0ccc2016c0 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -2,7 +2,7 @@ Extend pandas with custom array types. """ -from typing import Any, List, Optional, Tuple, Type +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type import numpy as np @@ -10,6 +10,9 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray # noqa: F401 + class ExtensionDtype: """ @@ -29,7 +32,6 @@ class ExtensionDtype: * type * name - * construct_from_string The following attributes influence the behavior of the dtype in pandas operations @@ -74,7 +76,7 @@ class property**. class ExtensionDtype: def __from_arrow__( - self, array: pyarrow.Array/ChunkedArray + self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] ) -> ExtensionArray: ... @@ -122,11 +124,11 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash(tuple(getattr(self, attr) for attr in self._metadata)) - def __ne__(self, other) -> bool: + def __ne__(self, other: Any) -> bool: return not self.__eq__(other) @property - def na_value(self): + def na_value(self) -> object: """ Default NA value to use for this type. @@ -184,7 +186,7 @@ def names(self) -> Optional[List[str]]: return None @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type["ExtensionArray"]: """ Return the array type associated with this dtype. @@ -250,7 +252,7 @@ def construct_from_string(cls, string: str): return cls() @classmethod - def is_dtype(cls, dtype) -> bool: + def is_dtype(cls, dtype: object) -> bool: """ Check if we match 'dtype'. @@ -261,7 +263,7 @@ def is_dtype(cls, dtype) -> bool: Returns ------- - is_dtype : bool + bool Notes ----- From 012a6a3912e13f3446363d8090ea02c81e99c7a1 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Wed, 12 Feb 2020 17:00:41 +0100 Subject: [PATCH 006/388] BLD: Run flake8 check on Cython files in pre-commit (#30847) --- .pre-commit-config.yaml | 14 ++++++++++++++ ci/code_checks.sh | 8 ++++---- flake8/cython-template.cfg | 4 ++++ flake8/cython.cfg | 3 +++ pandas/_libs/sparse_op_helper.pxi.in | 2 +- pandas/_libs/tslibs/util.pxd | 2 +- web/pandas_web.py | 5 +++-- 7 files changed, 30 insertions(+), 8 deletions(-) create mode 100644 flake8/cython-template.cfg create mode 100644 flake8/cython.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 139b9e31df46c..896765722bf32 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,6 +10,20 @@ repos: - id: flake8 language: python_venv additional_dependencies: [flake8-comprehensions>=3.1.0] + - id: flake8 + name: flake8-pyx + language: python_venv + files: \.(pyx|pxd)$ + types: + - file + args: [--append-config=flake8/cython.cfg] + - id: flake8 + name: flake8-pxd + language: python_venv + files: \.pxi\.in$ + types: + - file + args: [--append-config=flake8/cython-template.cfg] - repo: https://github.com/pre-commit/mirrors-isort rev: v4.3.21 hooks: diff --git a/ci/code_checks.sh b/ci/code_checks.sh index fdc9fef5d7f77..7eb80077c4fab 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -65,12 +65,12 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then flake8 --format="$FLAKE8_FORMAT" . RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Linting .pyx code' ; echo $MSG - flake8 --format="$FLAKE8_FORMAT" pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 + MSG='Linting .pyx and .pxd code' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Linting .pxd and .pxi.in' ; echo $MSG - flake8 --format="$FLAKE8_FORMAT" pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 + MSG='Linting .pxi.in' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg RET=$(($RET + $?)) ; echo $MSG "DONE" echo "flake8-rst --version" diff --git a/flake8/cython-template.cfg b/flake8/cython-template.cfg new file mode 100644 index 0000000000000..61562bd7701b1 --- /dev/null +++ b/flake8/cython-template.cfg @@ -0,0 +1,4 @@ +[flake8] +filename = *.pxi.in +select = E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 + diff --git a/flake8/cython.cfg b/flake8/cython.cfg new file mode 100644 index 0000000000000..2dfe47b60b4c1 --- /dev/null +++ b/flake8/cython.cfg @@ -0,0 +1,3 @@ +[flake8] +filename = *.pyx,*.pxd +select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 996da4ca2f92b..ce665ca812131 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -235,7 +235,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, {{dtype}}_t yfill): cdef: IntIndex out_index - Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices int32_t xloc, yloc int32_t[:] xindices, yindices, out_indices {{dtype}}_t[:] x, y diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 936532a81c6d6..e7f6b3334eb65 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -42,7 +42,7 @@ cdef extern from "numpy/ndarrayobject.h": bint PyArray_IsIntegerScalar(obj) nogil bint PyArray_Check(obj) nogil -cdef extern from "numpy/npy_common.h": +cdef extern from "numpy/npy_common.h": int64_t NPY_MIN_INT64 diff --git a/web/pandas_web.py b/web/pandas_web.py index a34a31feabce0..38ab78f5690e7 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -34,12 +34,13 @@ import time import typing -import feedparser import jinja2 -import markdown import requests import yaml +import feedparser +import markdown + class Preprocessors: """ From 143b0118ca75c2eed3663e6c480cb07afb9fe578 Mon Sep 17 00:00:00 2001 From: HH-MWB <50187675+HH-MWB@users.noreply.github.com> Date: Wed, 12 Feb 2020 11:01:45 -0500 Subject: [PATCH 007/388] CLN: Replace Appender and Substitution with simpler doc decorator (#31060) --- .../development/contributing_docstring.rst | 27 ++- pandas/core/accessor.py | 161 +++++++++--------- pandas/core/algorithms.py | 63 ++++--- pandas/core/base.py | 6 +- pandas/core/frame.py | 4 +- pandas/core/generic.py | 16 +- pandas/core/series.py | 6 +- pandas/tests/util/test_doc.py | 88 ++++++++++ pandas/util/_decorators.py | 40 +++++ 9 files changed, 266 insertions(+), 145 deletions(-) create mode 100644 pandas/tests/util/test_doc.py diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 649dd37b497b2..1c99b341f6c5a 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -937,33 +937,31 @@ classes. This helps us keep docstrings consistent, while keeping things clear for the user reading. It comes at the cost of some complexity when writing. Each shared docstring will have a base template with variables, like -``%(klass)s``. The variables filled in later on using the ``Substitution`` -decorator. Finally, docstrings can be appended to with the ``Appender`` -decorator. +``{klass}``. The variables filled in later on using the ``doc`` decorator. +Finally, docstrings can also be appended to with the ``doc`` decorator. In this example, we'll create a parent docstring normally (this is like ``pandas.core.generic.NDFrame``. Then we'll have two children (like ``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll -substitute the children's class names in this docstring. +substitute the class names in this docstring. .. code-block:: python class Parent: + @doc(klass="Parent") def my_function(self): - """Apply my function to %(klass)s.""" + """Apply my function to {klass}.""" ... class ChildA(Parent): - @Substitution(klass="ChildA") - @Appender(Parent.my_function.__doc__) + @doc(Parent.my_function, klass="ChildA") def my_function(self): ... class ChildB(Parent): - @Substitution(klass="ChildB") - @Appender(Parent.my_function.__doc__) + @doc(Parent.my_function, klass="ChildB") def my_function(self): ... @@ -972,18 +970,16 @@ The resulting docstrings are .. code-block:: python >>> print(Parent.my_function.__doc__) - Apply my function to %(klass)s. + Apply my function to Parent. >>> print(ChildA.my_function.__doc__) Apply my function to ChildA. >>> print(ChildB.my_function.__doc__) Apply my function to ChildB. -Notice two things: +Notice: 1. We "append" the parent docstring to the children docstrings, which are initially empty. -2. Python decorators are applied inside out. So the order is Append then - Substitution, even though Substitution comes first in the file. Our files will often contain a module-level ``_shared_doc_kwargs`` with some common substitution values (things like ``klass``, ``axes``, etc). @@ -992,14 +988,13 @@ You can substitute and append in one shot with something like .. code-block:: python - @Appender(template % _shared_doc_kwargs) + @doc(template, **_shared_doc_kwargs) def my_function(self): ... where ``template`` may come from a module-level ``_shared_docs`` dictionary mapping function names to docstrings. Wherever possible, we prefer using -``Appender`` and ``Substitution``, since the docstring-writing processes is -slightly closer to normal. +``doc``, since the docstring-writing processes is slightly closer to normal. See ``pandas.core.generic.NDFrame.fillna`` for an example template, and ``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna`` diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index a04e9c3e68310..4e3ef0c52bbdd 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -7,7 +7,7 @@ from typing import FrozenSet, Set import warnings -from pandas.util._decorators import Appender +from pandas.util._decorators import doc class DirNamesMixin: @@ -193,98 +193,97 @@ def __get__(self, obj, cls): return accessor_obj +@doc(klass="", others="") def _register_accessor(name, cls): - def decorator(accessor): - if hasattr(cls, name): - warnings.warn( - f"registration of accessor {repr(accessor)} under name " - f"{repr(name)} for type {repr(cls)} is overriding a preexisting" - f"attribute with the same name.", - UserWarning, - stacklevel=2, - ) - setattr(cls, name, CachedAccessor(name, accessor)) - cls._accessors.add(name) - return accessor - - return decorator + """ + Register a custom accessor on {klass} objects. + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. -_doc = """ -Register a custom accessor on %(klass)s objects. + Returns + ------- + callable + A class decorator. -Parameters ----------- -name : str - Name under which the accessor should be registered. A warning is issued - if this name conflicts with a preexisting attribute. + See Also + -------- + {others} -Returns -------- -callable - A class decorator. + Notes + ----- + When accessed, your accessor will be initialized with the pandas object + the user is interacting with. So the signature must be -See Also --------- -%(others)s + .. code-block:: python -Notes ------ -When accessed, your accessor will be initialized with the pandas object -the user is interacting with. So the signature must be + def __init__(self, pandas_object): # noqa: E999 + ... -.. code-block:: python + For consistency with pandas methods, you should raise an ``AttributeError`` + if the data passed to your accessor has an incorrect dtype. - def __init__(self, pandas_object): # noqa: E999 - ... + >>> pd.Series(['a', 'b']).dt + Traceback (most recent call last): + ... + AttributeError: Can only use .dt accessor with datetimelike values -For consistency with pandas methods, you should raise an ``AttributeError`` -if the data passed to your accessor has an incorrect dtype. + Examples + -------- ->>> pd.Series(['a', 'b']).dt -Traceback (most recent call last): -... -AttributeError: Can only use .dt accessor with datetimelike values + In your library code:: -Examples --------- + import pandas as pd -In your library code:: + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor: + def __init__(self, pandas_obj): + self._obj = pandas_obj - import pandas as pd + @property + def center(self): + # return the geographic center point of this DataFrame + lat = self._obj.latitude + lon = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) - @pd.api.extensions.register_dataframe_accessor("geo") - class GeoAccessor: - def __init__(self, pandas_obj): - self._obj = pandas_obj + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass - @property - def center(self): - # return the geographic center point of this DataFrame - lat = self._obj.latitude - lon = self._obj.longitude - return (float(lon.mean()), float(lat.mean())) + Back in an interactive IPython session: - def plot(self): - # plot this array's data on a map, e.g., using Cartopy - pass + >>> ds = pd.DataFrame({{'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + """ -Back in an interactive IPython session: + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + f"registration of accessor {repr(accessor)} under name " + f"{repr(name)} for type {repr(cls)} is overriding a preexisting" + f"attribute with the same name.", + UserWarning, + stacklevel=2, + ) + setattr(cls, name, CachedAccessor(name, accessor)) + cls._accessors.add(name) + return accessor - >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), - ... 'latitude': np.linspace(0, 20)}) - >>> ds.geo.center - (5.0, 10.0) - >>> ds.geo.plot() - # plots data on a map -""" + return decorator -@Appender( - _doc - % dict( - klass="DataFrame", others=("register_series_accessor, register_index_accessor") - ) +@doc( + _register_accessor, + klass="DataFrame", + others="register_series_accessor, register_index_accessor", ) def register_dataframe_accessor(name): from pandas import DataFrame @@ -292,11 +291,10 @@ def register_dataframe_accessor(name): return _register_accessor(name, DataFrame) -@Appender( - _doc - % dict( - klass="Series", others=("register_dataframe_accessor, register_index_accessor") - ) +@doc( + _register_accessor, + klass="Series", + others="register_dataframe_accessor, register_index_accessor", ) def register_series_accessor(name): from pandas import Series @@ -304,11 +302,10 @@ def register_series_accessor(name): return _register_accessor(name, Series) -@Appender( - _doc - % dict( - klass="Index", others=("register_dataframe_accessor, register_series_accessor") - ) +@doc( + _register_accessor, + klass="Index", + others="register_dataframe_accessor, register_series_accessor", ) def register_index_accessor(name): from pandas import Index diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 886b0a3c5fec1..c915895a8fc4a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,7 +11,7 @@ from pandas._libs import Timestamp, algos, hashtable as htable, lib from pandas._libs.tslib import iNaT -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import doc from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, @@ -487,9 +487,32 @@ def _factorize_array( return codes, uniques -_shared_docs[ - "factorize" -] = """ +@doc( + values=dedent( + """\ + values : sequence + A 1-D sequence. Sequences that aren't pandas objects are + coerced to ndarrays before factorization. + """ + ), + sort=dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + size_hint=dedent( + """\ + size_hint : int, optional + Hint to the hashtable sizer. + """ + ), +) +def factorize( + values, sort: bool = False, na_sentinel: int = -1, size_hint: Optional[int] = None +) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]: + """ Encode the object as an enumerated type or categorical variable. This method is useful for obtaining a numeric representation of an @@ -499,10 +522,10 @@ def _factorize_array( Parameters ---------- - %(values)s%(sort)s + {values}{sort} na_sentinel : int, default -1 Value to mark "not found". - %(size_hint)s\ + {size_hint}\ Returns ------- @@ -580,34 +603,6 @@ def _factorize_array( >>> uniques Index(['a', 'c'], dtype='object') """ - - -@Substitution( - values=dedent( - """\ - values : sequence - A 1-D sequence. Sequences that aren't pandas objects are - coerced to ndarrays before factorization. - """ - ), - sort=dedent( - """\ - sort : bool, default False - Sort `uniques` and shuffle `codes` to maintain the - relationship. - """ - ), - size_hint=dedent( - """\ - size_hint : int, optional - Hint to the hashtable sizer. - """ - ), -) -@Appender(_shared_docs["factorize"]) -def factorize( - values, sort: bool = False, na_sentinel: int = -1, size_hint: Optional[int] = None -) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]: # Implementation notes: This method is responsible for 3 things # 1.) coercing data to array-like (ndarray, Index, extension array) # 2.) factorizing codes and uniques diff --git a/pandas/core/base.py b/pandas/core/base.py index f3c8b50e774af..56d3596f71813 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -13,7 +13,7 @@ from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.util._decorators import Appender, Substitution, cache_readonly, doc from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import is_nested_object @@ -1386,7 +1386,8 @@ def memory_usage(self, deep=False): v += lib.memory_usage_of_objects(self.array) return v - @Substitution( + @doc( + algorithms.factorize, values="", order="", size_hint="", @@ -1398,7 +1399,6 @@ def memory_usage(self, deep=False): """ ), ) - @Appender(algorithms._shared_docs["factorize"]) def factorize(self, sort=False, na_sentinel=-1): return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0fca02f110031..432547fdb2bfa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -49,6 +49,7 @@ Appender, Substitution, deprecate_kwarg, + doc, rewrite_axis_style_signature, ) from pandas.util._validators import ( @@ -4155,8 +4156,7 @@ def rename( errors=errors, ) - @Substitution(**_shared_doc_kwargs) - @Appender(NDFrame.fillna.__doc__) + @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( self, value=None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dfafb1057a543..08b48cddde929 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -45,7 +45,12 @@ from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature +from pandas.util._decorators import ( + Appender, + Substitution, + doc, + rewrite_axis_style_signature, +) from pandas.util._validators import ( validate_bool_kwarg, validate_fillna_kwargs, @@ -5877,6 +5882,7 @@ def convert_dtypes( # ---------------------------------------------------------------------- # Filling NA's + @doc(**_shared_doc_kwargs) def fillna( self: FrameOrSeries, value=None, @@ -5897,11 +5903,11 @@ def fillna( each index (for a Series) or column (for a DataFrame). Values not in the dict/Series/DataFrame will not be filled. This value cannot be a list. - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use next valid observation to fill gap. - axis : %(axes_single_arg)s + axis : {axes_single_arg} Axis along which to fill missing values. inplace : bool, default False If True, fill in-place. Note: this will modify any @@ -5921,7 +5927,7 @@ def fillna( Returns ------- - %(klass)s or None + {klass} or None Object with missing values filled or None if ``inplace=True``. See Also @@ -5965,7 +5971,7 @@ def fillna( Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, 2, and 3 respectively. - >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> values = {{'A': 0, 'B': 1, 'C': 2, 'D': 3}} >>> df.fillna(value=values) A B C D 0 0.0 2.0 2.0 0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 7d74d32bf5e14..946caaac97838 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -25,7 +25,7 @@ from pandas._libs import lib, properties, reshape, tslibs from pandas._typing import Label from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import Appender, Substitution, doc from pandas.util._validators import validate_bool_kwarg, validate_percentile from pandas.core.dtypes.cast import convert_dtypes, validate_numeric_casting @@ -73,6 +73,7 @@ is_empty_data, sanitize_array, ) +from pandas.core.generic import NDFrame from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( @@ -4149,8 +4150,7 @@ def drop( errors=errors, ) - @Substitution(**_shared_doc_kwargs) - @Appender(generic.NDFrame.fillna.__doc__) + @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( self, value=None, diff --git a/pandas/tests/util/test_doc.py b/pandas/tests/util/test_doc.py new file mode 100644 index 0000000000000..7e5e24456b9a7 --- /dev/null +++ b/pandas/tests/util/test_doc.py @@ -0,0 +1,88 @@ +from textwrap import dedent + +from pandas.util._decorators import doc + + +@doc(method="cumsum", operation="sum") +def cumsum(whatever): + """ + This is the {method} method. + + It computes the cumulative {operation}. + """ + + +@doc( + cumsum, + """ + Examples + -------- + + >>> cumavg([1, 2, 3]) + 2 + """, + method="cumavg", + operation="average", +) +def cumavg(whatever): + pass + + +@doc(cumsum, method="cummax", operation="maximum") +def cummax(whatever): + pass + + +@doc(cummax, method="cummin", operation="minimum") +def cummin(whatever): + pass + + +def test_docstring_formatting(): + docstr = dedent( + """ + This is the cumsum method. + + It computes the cumulative sum. + """ + ) + assert cumsum.__doc__ == docstr + + +def test_docstring_appending(): + docstr = dedent( + """ + This is the cumavg method. + + It computes the cumulative average. + + Examples + -------- + + >>> cumavg([1, 2, 3]) + 2 + """ + ) + assert cumavg.__doc__ == docstr + + +def test_doc_template_from_func(): + docstr = dedent( + """ + This is the cummax method. + + It computes the cumulative maximum. + """ + ) + assert cummax.__doc__ == docstr + + +def test_inherit_doc_template(): + docstr = dedent( + """ + This is the cummin method. + + It computes the cumulative minimum. + """ + ) + assert cummin.__doc__ == docstr diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 0aab5a9c4113d..05f73a126feca 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -247,6 +247,46 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: return decorate +def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: + """ + A decorator take docstring templates, concatenate them and perform string + substitution on it. + + This decorator is robust even if func.__doc__ is None. This decorator will + add a variable "_docstr_template" to the wrapped function to save original + docstring template for potential usage. + + Parameters + ---------- + *args : str or callable + The string / docstring / docstring template to be appended in order + after default docstring under function. + **kwags : str + The string which would be used to format docstring template. + """ + + def decorator(func: F) -> F: + @wraps(func) + def wrapper(*args, **kwargs) -> Callable: + return func(*args, **kwargs) + + templates = [func.__doc__ if func.__doc__ else ""] + for arg in args: + if isinstance(arg, str): + templates.append(arg) + elif hasattr(arg, "_docstr_template"): + templates.append(arg._docstr_template) # type: ignore + elif arg.__doc__: + templates.append(arg.__doc__) + + wrapper._docstr_template = "".join(dedent(t) for t in templates) # type: ignore + wrapper.__doc__ = wrapper._docstr_template.format(**kwargs) # type: ignore + + return cast(F, wrapper) + + return decorator + + # Substitution and Appender are derived from matplotlib.docstring (1.1.0) # module https://matplotlib.org/users/license.html From c2f3ce396454c8a80f695e64b87b43757797b7d3 Mon Sep 17 00:00:00 2001 From: Jean-Francois Zinque Date: Wed, 12 Feb 2020 17:04:22 +0100 Subject: [PATCH 008/388] BUG: MultiIndex intersection with sort=False does not preserve order (#31312) --- asv_bench/benchmarks/multiindex_object.py | 39 ++++++++++++++++++ doc/source/whatsnew/v1.1.0.rst | 10 +++++ pandas/core/indexes/multi.py | 20 +++++++-- pandas/tests/indexes/multi/test_setops.py | 50 +++++++++++------------ 4 files changed, 89 insertions(+), 30 deletions(-) diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py index 0e188c58012fa..793f0c7c03c77 100644 --- a/asv_bench/benchmarks/multiindex_object.py +++ b/asv_bench/benchmarks/multiindex_object.py @@ -160,4 +160,43 @@ def time_equals_non_object_index(self): self.mi_large_slow.equals(self.idx_non_object) +class SetOperations: + + params = [ + ("monotonic", "non_monotonic"), + ("datetime", "int", "string"), + ("intersection", "union", "symmetric_difference"), + ] + param_names = ["index_structure", "dtype", "method"] + + def setup(self, index_structure, dtype, method): + N = 10 ** 5 + level1 = range(1000) + + level2 = date_range(start="1/1/2000", periods=N // 1000) + dates_left = MultiIndex.from_product([level1, level2]) + + level2 = range(N // 1000) + int_left = MultiIndex.from_product([level1, level2]) + + level2 = tm.makeStringIndex(N // 1000).values + str_left = MultiIndex.from_product([level1, level2]) + + data = { + "datetime": dates_left, + "int": int_left, + "string": str_left, + } + + if index_structure == "non_monotonic": + data = {k: mi[::-1] for k, mi in data.items()} + + data = {k: {"left": mi, "right": mi[:-1]} for k, mi in data.items()} + self.left = data[dtype]["left"] + self.right = data[dtype]["right"] + + def time_operation(self, index_structure, dtype, method): + getattr(self.left, method)(self.right) + + from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 381578ad13bdd..8d7f817b83946 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -176,6 +176,16 @@ MultiIndex index=[["a", "a", "b", "b"], [1, 2, 1, 2]]) # Rows are now ordered as the requested keys df.loc[(['b', 'a'], [2, 1]), :] + +- Bug in :meth:`MultiIndex.intersection` was not guaranteed to preserve order when ``sort=False``. (:issue:`31325`) + +.. ipython:: python + + left = pd.MultiIndex.from_arrays([["b", "a"], [2, 1]]) + right = pd.MultiIndex.from_arrays([["a", "b", "c"], [1, 2, 3]]) + # Common elements are now guaranteed to be ordered by the left side + left.intersection(right, sort=False) + - I/O diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ac151daac951a..8a5bb3002213d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3314,9 +3314,23 @@ def intersection(self, other, sort=False): if self.equals(other): return self - self_tuples = self._ndarray_values - other_tuples = other._ndarray_values - uniq_tuples = set(self_tuples) & set(other_tuples) + lvals = self._ndarray_values + rvals = other._ndarray_values + + uniq_tuples = None # flag whether _inner_indexer was succesful + if self.is_monotonic and other.is_monotonic: + try: + uniq_tuples = self._inner_indexer(lvals, rvals)[0] + sort = False # uniq_tuples is already sorted + except TypeError: + pass + + if uniq_tuples is None: + other_uniq = set(rvals) + seen = set() + uniq_tuples = [ + x for x in lvals if x in other_uniq and not (x in seen or seen.add(x)) + ] if sort is None: uniq_tuples = sorted(uniq_tuples) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index f949db537de67..627127f7b5b53 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -19,22 +19,20 @@ def test_set_ops_error_cases(idx, case, sort, method): @pytest.mark.parametrize("sort", [None, False]) -def test_intersection_base(idx, sort): - first = idx[:5] - second = idx[:3] - intersect = first.intersection(second, sort=sort) +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_intersection_base(idx, sort, klass): + first = idx[2::-1] # first 3 elements reversed + second = idx[:5] - if sort is None: - tm.assert_index_equal(intersect, second.sort_values()) - assert tm.equalContents(intersect, second) + if klass is not MultiIndex: + second = klass(second.values) - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.intersection(case, sort=sort) - if sort is None: - tm.assert_index_equal(result, second.sort_values()) - assert tm.equalContents(result, second) + intersect = first.intersection(second, sort=sort) + if sort is None: + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(intersect, expected) msg = "other must be a MultiIndex or a list of tuples" with pytest.raises(TypeError, match=msg): @@ -42,22 +40,20 @@ def test_intersection_base(idx, sort): @pytest.mark.parametrize("sort", [None, False]) -def test_union_base(idx, sort): - first = idx[3:] +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_union_base(idx, sort, klass): + first = idx[::-1] second = idx[:5] - everything = idx + + if klass is not MultiIndex: + second = klass(second.values) + union = first.union(second, sort=sort) if sort is None: - tm.assert_index_equal(union, everything.sort_values()) - assert tm.equalContents(union, everything) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case, sort=sort) - if sort is None: - tm.assert_index_equal(result, everything.sort_values()) - assert tm.equalContents(result, everything) + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(union, expected) msg = "other must be a MultiIndex or a list of tuples" with pytest.raises(TypeError, match=msg): From bc6ab050f89fe34cc31370af21c12062c7dbb10e Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Wed, 12 Feb 2020 18:09:00 +0200 Subject: [PATCH 009/388] CLN: Some code cleanups (#31792) --- pandas/core/internals/blocks.py | 19 +++++-------------- pandas/core/internals/concat.py | 9 ++++----- pandas/core/internals/managers.py | 7 ++----- pandas/io/parsers.py | 7 +++---- pandas/io/pytables.py | 7 +++---- 5 files changed, 17 insertions(+), 32 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 536aa53c95fba..736db6b963adf 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -85,8 +85,6 @@ import pandas.core.missing as missing from pandas.core.nanops import nanpercentile -from pandas.io.formats.printing import pprint_thing - class Block(PandasObject): """ @@ -159,7 +157,8 @@ def _check_ndim(self, values, ndim): @property def _holder(self): - """The array-like that can hold the underlying values. + """ + The array-like that can hold the underlying values. None for 'Block', overridden by subclasses that don't use an ndarray. @@ -284,16 +283,11 @@ def __repr__(self) -> str: # don't want to print out all of the items here name = type(self).__name__ if self._is_single_block: - result = f"{name}: {len(self)} dtype: {self.dtype}" - else: - shape = " x ".join(pprint_thing(s) for s in self.shape) - result = ( - f"{name}: {pprint_thing(self.mgr_locs.indexer)}, " - f"{shape}, dtype: {self.dtype}" - ) + shape = " x ".join(str(s) for s in self.shape) + result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}" return result @@ -319,10 +313,7 @@ def getitem_block(self, slicer, new_mgr_locs=None): As of now, only supports slices that preserve dimensionality. """ if new_mgr_locs is None: - if isinstance(slicer, tuple): - axis0_slicer = slicer[0] - else: - axis0_slicer = slicer + axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer new_mgr_locs = self.mgr_locs[axis0_slicer] new_values = self._slice(slicer) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index c75373b82305c..9fd7ff073afdc 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -204,10 +204,9 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): missing_arr.fill(fill_value) return missing_arr - if not self.indexers: - if not self.block._can_consolidate: - # preserve these for validation in concat_compat - return self.block.values + if (not self.indexers) and (not self.block._can_consolidate): + # preserve these for validation in concat_compat + return self.block.values if self.block.is_bool and not self.block.is_categorical: # External code requested filling/upcasting, bool values must @@ -372,7 +371,7 @@ def _get_empty_dtype_and_na(join_units): raise AssertionError(msg) -def is_uniform_join_units(join_units): +def is_uniform_join_units(join_units) -> bool: """ Check if the join units consist of blocks of uniform type that can be concatenated using Block.concat_same_type instead of the generic diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 08ae0b02169d4..0ec471cf366fe 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -589,7 +589,7 @@ def comp(s, regex=False): ) return _compare_or_regex_search(values, s, regex) - masks = [comp(s, regex) for i, s in enumerate(src_list)] + masks = [comp(s, regex) for s in src_list] result_blocks = [] src_len = len(src_list) - 1 @@ -755,10 +755,7 @@ def copy(self, deep=True): # hit in e.g. tests.io.json.test_pandas def copy_func(ax): - if deep == "all": - return ax.copy(deep=True) - else: - return ax.view() + return ax.copy(deep=True) if deep == "all" else ax.view() new_axes = [copy_func(ax) for ax in self.axes] else: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5f754aa07a5e1..0173a25b0ec16 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1493,11 +1493,10 @@ def extract(r): # level, then our header was too long. for n in range(len(columns[0])): if all(ensure_str(col[n]) in self.unnamed_cols for col in columns): + header = ",".join(str(x) for x in self.header) raise ParserError( - "Passed header=[{header}] are too many rows for this " - "multi_index of columns".format( - header=",".join(str(x) for x in self.header) - ) + f"Passed header=[{header}] are too many rows " + "for this multi_index of columns" ) # Clean the column names (if we have an index_col). diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 570d1f9a89159..ccd5814287173 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3095,9 +3095,8 @@ def write(self, obj, **kwargs): self.attrs.ndim = data.ndim for i, ax in enumerate(data.axes): - if i == 0: - if not ax.is_unique: - raise ValueError("Columns index has to be unique for fixed format") + if i == 0 and (not ax.is_unique): + raise ValueError("Columns index has to be unique for fixed format") self.write_index(f"axis{i}", ax) # Supporting mixed-type DataFrame objects...nontrivial @@ -4230,7 +4229,7 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): chunksize = 100000 rows = np.empty(min(chunksize, nrows), dtype=self.dtype) - chunks = int(nrows / chunksize) + 1 + chunks = nrows // chunksize + 1 for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, nrows) From 97054acbf608bb4b22ad49346e9f9a38e0e08c8f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Feb 2020 16:10:24 +0000 Subject: [PATCH 010/388] D202 No blank lines allowed after function docstring (#31895) --- pandas/_config/config.py | 10 ------- pandas/_config/localization.py | 1 - pandas/_testing.py | 6 ---- pandas/compat/numpy/function.py | 5 ---- pandas/compat/pickle_compat.py | 1 - pandas/conftest.py | 2 -- pandas/core/algorithms.py | 4 --- pandas/core/apply.py | 5 ---- pandas/core/arrays/categorical.py | 6 ---- pandas/core/arrays/datetimelike.py | 2 -- pandas/core/arrays/integer.py | 2 -- pandas/core/arrays/period.py | 1 - pandas/core/arrays/sparse/array.py | 1 - pandas/core/arrays/sparse/scipy_sparse.py | 2 -- pandas/core/common.py | 2 -- pandas/core/computation/expr.py | 1 - pandas/core/computation/pytables.py | 3 -- pandas/core/dtypes/cast.py | 10 ------- pandas/core/dtypes/common.py | 30 ------------------- pandas/core/dtypes/concat.py | 5 ---- pandas/core/dtypes/dtypes.py | 3 -- pandas/core/dtypes/inference.py | 7 ----- pandas/core/dtypes/missing.py | 2 -- pandas/core/frame.py | 2 -- pandas/core/generic.py | 18 ----------- pandas/core/groupby/categorical.py | 2 -- pandas/core/groupby/generic.py | 3 -- pandas/core/groupby/groupby.py | 7 ----- pandas/core/groupby/grouper.py | 1 - pandas/core/groupby/ops.py | 2 -- pandas/core/indexes/base.py | 13 -------- pandas/core/indexes/category.py | 1 - pandas/core/indexes/datetimelike.py | 2 -- pandas/core/indexes/datetimes.py | 1 - pandas/core/indexes/extension.py | 1 - pandas/core/indexes/interval.py | 1 - pandas/core/indexes/multi.py | 12 -------- pandas/core/internals/blocks.py | 19 ------------ pandas/core/internals/concat.py | 1 - pandas/core/internals/construction.py | 3 -- pandas/core/internals/managers.py | 6 ---- pandas/core/nanops.py | 4 --- pandas/core/ops/__init__.py | 2 -- pandas/core/ops/array_ops.py | 3 -- pandas/core/ops/common.py | 1 - pandas/core/resample.py | 6 ---- pandas/core/reshape/merge.py | 1 - pandas/core/reshape/pivot.py | 1 - pandas/core/reshape/tile.py | 2 -- pandas/core/series.py | 5 ---- pandas/core/sorting.py | 1 - pandas/core/strings.py | 4 --- pandas/core/tools/timedeltas.py | 2 -- pandas/core/util/hashing.py | 2 -- pandas/core/window/numba_.py | 1 - pandas/core/window/rolling.py | 6 ---- pandas/io/clipboard/__init__.py | 1 - pandas/io/common.py | 1 - pandas/io/excel/_openpyxl.py | 10 ------- pandas/io/excel/_xlrd.py | 1 - pandas/io/excel/_xlsxwriter.py | 2 -- pandas/io/formats/format.py | 13 +------- pandas/io/formats/latex.py | 1 - pandas/io/formats/style.py | 1 - pandas/io/html.py | 2 -- pandas/io/json/_json.py | 5 ---- pandas/io/json/_normalize.py | 1 - pandas/io/orc.py | 1 - pandas/io/parquet.py | 2 -- pandas/io/parsers.py | 8 ----- pandas/io/pytables.py | 24 --------------- pandas/io/sas/sas_xport.py | 2 -- pandas/io/sql.py | 6 ---- pandas/io/stata.py | 2 -- pandas/plotting/_matplotlib/converter.py | 2 -- pandas/plotting/_matplotlib/core.py | 3 -- pandas/tests/dtypes/test_common.py | 1 - pandas/tests/frame/test_analytics.py | 3 -- pandas/tests/generic/test_generic.py | 1 - pandas/tests/groupby/test_transform.py | 1 - .../tests/indexes/datetimes/test_timezones.py | 1 - pandas/tests/indexes/ranges/test_range.py | 1 - pandas/tests/indexes/ranges/test_setops.py | 1 - pandas/tests/indexing/common.py | 3 -- pandas/tests/indexing/test_floats.py | 1 - .../tests/io/generate_legacy_storage_files.py | 1 - pandas/tests/io/json/test_ujson.py | 1 - pandas/tests/io/test_clipboard.py | 1 - pandas/tests/io/test_parquet.py | 1 - pandas/tests/plotting/common.py | 2 -- pandas/tests/resample/test_time_grouper.py | 1 - pandas/tests/reshape/merge/test_merge_asof.py | 2 -- .../merge/test_merge_index_as_string.py | 1 - .../window/moments/test_moments_rolling.py | 1 - pandas/tseries/offsets.py | 1 - pandas/util/_decorators.py | 2 -- 96 files changed, 1 insertion(+), 361 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 8b6116d3abd60..c283baeb9d412 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -550,7 +550,6 @@ def _select_options(pat: str) -> List[str]: if pat=="all", returns all registered options """ - # short-circuit for exact key if pat in _registered_options: return [pat] @@ -573,7 +572,6 @@ def _get_root(key: str) -> Tuple[Dict[str, Any], str]: def _is_deprecated(key: str) -> bool: """ Returns True if the given option has been deprecated """ - key = key.lower() return key in _deprecated_options @@ -586,7 +584,6 @@ def _get_deprecated_option(key: str): ------- DeprecatedOption (namedtuple) if key is deprecated, None otherwise """ - try: d = _deprecated_options[key] except KeyError: @@ -611,7 +608,6 @@ def _translate_key(key: str) -> str: if key id deprecated and a replacement key defined, will return the replacement key, otherwise returns `key` as - is """ - d = _get_deprecated_option(key) if d: return d.rkey or key @@ -627,7 +623,6 @@ def _warn_if_deprecated(key: str) -> bool: ------- bool - True if `key` is deprecated, False otherwise. """ - d = _get_deprecated_option(key) if d: if d.msg: @@ -649,7 +644,6 @@ def _warn_if_deprecated(key: str) -> bool: def _build_option_description(k: str) -> str: """ Builds a formatted description of a registered option and prints it """ - o = _get_registered_option(k) d = _get_deprecated_option(k) @@ -674,7 +668,6 @@ def _build_option_description(k: str) -> str: def pp_options_list(keys: Iterable[str], width=80, _print: bool = False): """ Builds a concise listing of available options, grouped by prefix """ - from textwrap import wrap from itertools import groupby @@ -738,7 +731,6 @@ def config_prefix(prefix): will register options "display.font.color", "display.font.size", set the value of "display.font.size"... and so on. """ - # Note: reset_option relies on set_option, and on key directly # it does not fit in to this monkey-patching scheme @@ -801,7 +793,6 @@ def is_instance_factory(_type) -> Callable[[Any], None]: ValueError if x is not an instance of `_type` """ - if isinstance(_type, (tuple, list)): _type = tuple(_type) type_repr = "|".join(map(str, _type)) @@ -848,7 +839,6 @@ def is_nonnegative_int(value: Optional[int]) -> None: ValueError When the value is not None or is a negative integer """ - if value is None: return diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 0d68e78372d8a..66865e1afb952 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -61,7 +61,6 @@ def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool: bool Whether the passed locale can be set """ - try: with set_locale(lc, lc_var=lc_var): pass diff --git a/pandas/_testing.py b/pandas/_testing.py index 9e71524263a18..46ed65c87e8dd 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1508,7 +1508,6 @@ def assert_sp_array_equal( create a new BlockIndex for that array, with consolidated block indices. """ - _check_isinstance(left, right, pd.arrays.SparseArray) assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype) @@ -1876,7 +1875,6 @@ def makeCustomIndex( if unspecified, string labels will be generated. """ - if ndupe_l is None: ndupe_l = [1] * nlevels assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels @@ -2025,7 +2023,6 @@ def makeCustomDataframe( >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) """ - assert c_idx_nlevels > 0 assert r_idx_nlevels > 0 assert r_idx_type is None or ( @@ -2229,7 +2226,6 @@ def can_connect(url, error_classes=None): Return True if no IOError (unable to connect) or URLError (bad url) was raised """ - if error_classes is None: error_classes = _get_default_network_errors() @@ -2603,7 +2599,6 @@ def test_parallel(num_threads=2, kwargs_list=None): https://github.com/scikit-image/scikit-image/pull/1519 """ - assert num_threads > 0 has_kwargs_list = kwargs_list is not None if has_kwargs_list: @@ -2685,7 +2680,6 @@ def set_timezone(tz: str): ... 'EDT' """ - import os import time diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 05ecccc67daef..ccc970fb453c2 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -99,7 +99,6 @@ def validate_argmin_with_skipna(skipna, args, kwargs): 'skipna' parameter is either an instance of ndarray or is None, since 'skipna' itself should be a boolean """ - skipna, args = process_skipna(skipna, args) validate_argmin(args, kwargs) return skipna @@ -113,7 +112,6 @@ def validate_argmax_with_skipna(skipna, args, kwargs): 'skipna' parameter is either an instance of ndarray or is None, since 'skipna' itself should be a boolean """ - skipna, args = process_skipna(skipna, args) validate_argmax(args, kwargs) return skipna @@ -151,7 +149,6 @@ def validate_argsort_with_ascending(ascending, args, kwargs): either integer type or is None, since 'ascending' itself should be a boolean """ - if is_integer(ascending) or ascending is None: args = (ascending,) + args ascending = True @@ -173,7 +170,6 @@ def validate_clip_with_axis(axis, args, kwargs): so check if the 'axis' parameter is an instance of ndarray, since 'axis' itself should either be an integer or None """ - if isinstance(axis, ndarray): args = (axis,) + args axis = None @@ -298,7 +294,6 @@ def validate_take_with_convert(convert, args, kwargs): ndarray or 'None', so check if the 'convert' parameter is either an instance of ndarray or is None """ - if isinstance(convert, ndarray) or convert is None: args = (convert,) + args convert = True diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 0a1a1376bfc8d..3f4acca8bce18 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -229,7 +229,6 @@ def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): encoding : an optional encoding is_verbose : show exception output """ - try: fh.seek(0) if encoding is not None: diff --git a/pandas/conftest.py b/pandas/conftest.py index 821bec19d6115..41f77506bfbc8 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -118,7 +118,6 @@ def ip(): Will raise a skip if IPython is not installed. """ - pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.interactiveshell import InteractiveShell @@ -679,7 +678,6 @@ def any_nullable_int_dtype(request): * 'UInt64' * 'Int64' """ - return request.param diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c915895a8fc4a..02a979aea6c6b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -85,7 +85,6 @@ def _ensure_data(values, dtype=None): values : ndarray pandas_dtype : str or dtype """ - # we check some simple dtypes first if is_object_dtype(dtype): return ensure_object(np.asarray(values)), "object" @@ -182,7 +181,6 @@ def _reconstruct_data(values, dtype, original): ------- Index for extension types, otherwise ndarray casted to dtype """ - if is_extension_array_dtype(dtype): values = dtype.construct_array_type()._from_sequence(values) elif is_bool_dtype(dtype): @@ -368,7 +366,6 @@ def unique(values): >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')]) array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) """ - values = _ensure_arraylike(values) if is_extension_array_dtype(values): @@ -791,7 +788,6 @@ def duplicated(values, keep="first") -> np.ndarray: ------- duplicated : ndarray """ - values, _ = _ensure_data(values) ndtype = values.dtype.name f = getattr(htable, f"duplicated_{ndtype}") diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 81e1d84880f60..70e0a129c055f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -35,7 +35,6 @@ def frame_apply( kwds=None, ): """ construct and return a row or column based frame apply object """ - axis = obj._get_axis_number(axis) klass: Type[FrameApply] if axis == 0: @@ -144,7 +143,6 @@ def agg_axis(self) -> "Index": def get_result(self): """ compute the results """ - # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds) @@ -193,7 +191,6 @@ def apply_empty_result(self): we will try to apply the function to an empty series in order to see if this is a reduction function """ - # we are not asked to reduce or infer reduction # so just return a copy of the existing object if self.result_type not in ["reduce", None]: @@ -396,7 +393,6 @@ def wrap_results_for_axis( self, results: ResType, res_index: "Index" ) -> "DataFrame": """ return the results for the rows """ - result = self.obj._constructor(data=results) if not isinstance(results[0], ABCSeries): @@ -457,7 +453,6 @@ def wrap_results_for_axis( def infer_to_same_shape(self, results: ResType, res_index: "Index") -> "DataFrame": """ infer the results to the same shape as the input object """ - result = self.obj._constructor(data=results) result = result.T diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0e04354ae7c89..b095288acca90 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -701,7 +701,6 @@ def _set_categories(self, categories, fastpath=False): [a, c] Categories (2, object): [a, c] """ - if fastpath: new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered) else: @@ -1227,7 +1226,6 @@ def shape(self): ------- shape : tuple """ - return tuple([len(self._codes)]) def shift(self, periods, fill_value=None): @@ -1384,7 +1382,6 @@ def isna(self): Categorical.notna : Boolean inverse of Categorical.isna. """ - ret = self._codes == -1 return ret @@ -1934,7 +1931,6 @@ def _repr_categories_info(self) -> str: """ Returns a string representation of the footer. """ - category_strs = self._repr_categories() dtype = str(self.categories.dtype) levheader = f"Categories ({len(self.categories)}, {dtype}): " @@ -2260,7 +2256,6 @@ def unique(self): Series.unique """ - # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() @@ -2320,7 +2315,6 @@ def is_dtype_equal(self, other): ------- bool """ - try: return hash(self.dtype) == hash(other.dtype) except (AttributeError, TypeError): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 03c8e48c6e699..07aa8d49338c8 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -500,7 +500,6 @@ def __getitem__(self, key): This getitem defers to the underlying array, which by-definition can only handle list-likes, slices, and integer scalars """ - is_int = lib.is_integer(key) if lib.is_scalar(key) and not is_int: raise IndexError( @@ -892,7 +891,6 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): This is an internal routine. """ - if self._hasnans: if convert: result = result.astype(convert) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e0b6947394cc4..642ae6d4deacb 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -152,7 +152,6 @@ def safe_cast(values, dtype, copy: bool): ints. """ - try: return values.astype(dtype, casting="safe", copy=copy) except TypeError: @@ -600,7 +599,6 @@ def _maybe_mask_result(self, result, mask, other, op_name: str): other : scalar or array-like op_name : str """ - # if we have a float operand we are by-definition # a float result # or our op is a divide diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8383b783d90e7..8141e2c78a7e2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -624,7 +624,6 @@ def _addsub_int_array( ------- result : PeriodArray """ - assert op in [operator.add, operator.sub] if op is operator.sub: other = -other diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 8008805ddcf87..b17a4647ffc9f 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1503,7 +1503,6 @@ def make_sparse(arr, kind="block", fill_value=None, dtype=None, copy=False): ------- (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) """ - arr = com.values_from_object(arr) if arr.ndim > 1: diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index b67f2c9f52c76..eff9c03386a38 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -31,7 +31,6 @@ def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): def get_indexers(levels): """ Return sparse coords and dense labels for subset levels """ - # TODO: how to do this better? cleanly slice nonnull_labels given the # coord values_ilabels = [tuple(x[i] for i in levels) for x in nonnull_labels.index] @@ -90,7 +89,6 @@ def _sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=F levels row_levels, column_levels as the row and column labels respectively. Returns the sparse_matrix, row and column labels. """ - import scipy.sparse if ss.index.nlevels < 2: diff --git a/pandas/core/common.py b/pandas/core/common.py index 00c7a41477017..550ce74de5357 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -337,7 +337,6 @@ def apply_if_callable(maybe_callable, obj, **kwargs): obj : NDFrame **kwargs """ - if callable(maybe_callable): return maybe_callable(obj, **kwargs) @@ -412,7 +411,6 @@ def random_state(state=None): ------- np.random.RandomState """ - if is_integer(state): return np.random.RandomState(state) elif isinstance(state, np.random.RandomState): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index c26208d3b4465..c59952bea8dc0 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -599,7 +599,6 @@ def visit_Assign(self, node, **kwargs): might or might not exist in the resolvers """ - if len(node.targets) != 1: raise SyntaxError("can only assign a single expression") if not isinstance(node.targets[0], ast.Name): diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 9f209cccd5be6..19f151846a080 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -95,7 +95,6 @@ def _disallow_scalar_only_bool_ops(self): def prune(self, klass): def pr(left, right): """ create and return a new specialized BinOp from myself """ - if left is None: return right elif right is None: @@ -476,7 +475,6 @@ def _validate_where(w): ------ TypeError : An invalid data type was passed in for w (e.g. dict). """ - if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)): raise TypeError( "where must be passed as a string, PyTablesExpr, " @@ -574,7 +572,6 @@ def __repr__(self) -> str: def evaluate(self): """ create and return the numexpr condition and filter """ - try: self.condition = self.terms.prune(ConditionBinOp) except AttributeError: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6120bc92adbfc..011c09c9ca1ef 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -78,7 +78,6 @@ def maybe_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ - if isinstance(values, (list, tuple, range)): values = construct_1d_object_array_from_listlike(values) if getattr(values, "dtype", None) == np.object_: @@ -97,7 +96,6 @@ def is_nested_object(obj) -> bool: This may not be necessarily be performant. """ - if isinstance(obj, ABCSeries) and is_object_dtype(obj): if any(isinstance(v, ABCSeries) for v in obj.values): @@ -525,7 +523,6 @@ def _ensure_dtype_type(value, dtype): ------- object """ - # Start with exceptions in which we do _not_ cast to numpy types if is_extension_array_dtype(dtype): return value @@ -566,7 +563,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False): If False, scalar belongs to pandas extension types is inferred as object """ - dtype = np.object_ # a 1-element ndarray @@ -823,7 +819,6 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ - # dispatch on extension dtype if needed if is_extension_array_dtype(dtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) @@ -965,7 +960,6 @@ def soft_convert_objects( copy: bool = True, ): """ if we have an object dtype, try to coerce dates and/or numbers """ - validate_bool_kwarg(datetime, "datetime") validate_bool_kwarg(numeric, "numeric") validate_bool_kwarg(timedelta, "timedelta") @@ -1053,7 +1047,6 @@ def convert_dtypes( dtype new dtype """ - if convert_string or convert_integer or convert_boolean: try: inferred_dtype = lib.infer_dtype(input_array) @@ -1133,7 +1126,6 @@ def maybe_infer_to_datetimelike(value, convert_dates: bool = False): leave inferred dtype 'date' alone """ - # TODO: why not timedelta? if isinstance( value, (ABCDatetimeIndex, ABCPeriodIndex, ABCDatetimeArray, ABCPeriodArray) @@ -1373,7 +1365,6 @@ def find_common_type(types): numpy.find_common_type """ - if len(types) == 0: raise ValueError("no types given") @@ -1420,7 +1411,6 @@ def cast_scalar_to_array(shape, value, dtype=None): ndarray of shape, filled with value, of specified / inferred dtype """ - if dtype is None: dtype, fill_value = infer_dtype_from_scalar(value) else: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index f8e14d1cbc9e9..c0420244f671e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -92,7 +92,6 @@ def ensure_float(arr): float_arr : The original array cast to the float dtype if possible. Otherwise, the original array is returned. """ - if issubclass(arr.dtype.type, (np.integer, np.bool_)): arr = arr.astype(float) return arr @@ -132,7 +131,6 @@ def ensure_categorical(arr): cat_arr : The original array cast as a Categorical. If it already is a Categorical, we return as is. """ - if not is_categorical(arr): from pandas import Categorical @@ -325,7 +323,6 @@ def is_scipy_sparse(arr) -> bool: >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3])) False """ - global _is_scipy_sparse if _is_scipy_sparse is None: @@ -367,7 +364,6 @@ def is_categorical(arr) -> bool: >>> is_categorical(pd.CategoricalIndex([1, 2, 3])) True """ - return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) @@ -398,7 +394,6 @@ def is_datetime64_dtype(arr_or_dtype) -> bool: >>> is_datetime64_dtype([1, 2, 3]) False """ - return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) @@ -434,7 +429,6 @@ def is_datetime64tz_dtype(arr_or_dtype) -> bool: >>> is_datetime64tz_dtype(s) True """ - if arr_or_dtype is None: return False return DatetimeTZDtype.is_dtype(arr_or_dtype) @@ -467,7 +461,6 @@ def is_timedelta64_dtype(arr_or_dtype) -> bool: >>> is_timedelta64_dtype('0 days') False """ - return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) @@ -498,7 +491,6 @@ def is_period_dtype(arr_or_dtype) -> bool: >>> is_period_dtype(pd.PeriodIndex([], freq="A")) True """ - # TODO: Consider making Period an instance of PeriodDtype if arr_or_dtype is None: return False @@ -534,7 +526,6 @@ def is_interval_dtype(arr_or_dtype) -> bool: >>> is_interval_dtype(pd.IntervalIndex([interval])) True """ - # TODO: Consider making Interval an instance of IntervalDtype if arr_or_dtype is None: return False @@ -568,7 +559,6 @@ def is_categorical_dtype(arr_or_dtype) -> bool: >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) True """ - if arr_or_dtype is None: return False return CategoricalDtype.is_dtype(arr_or_dtype) @@ -602,7 +592,6 @@ def is_string_dtype(arr_or_dtype) -> bool: >>> is_string_dtype(pd.Series([1, 2])) False """ - # TODO: gh-15585: consider making the checks stricter. def condition(dtype) -> bool: return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) @@ -641,7 +630,6 @@ def is_period_arraylike(arr) -> bool: >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) True """ - if isinstance(arr, (ABCPeriodIndex, ABCPeriodArray)): return True elif isinstance(arr, (np.ndarray, ABCSeries)): @@ -673,7 +661,6 @@ def is_datetime_arraylike(arr) -> bool: >>> is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) True """ - if isinstance(arr, ABCDatetimeIndex): return True elif isinstance(arr, (np.ndarray, ABCSeries)): @@ -711,7 +698,6 @@ def is_dtype_equal(source, target) -> bool: >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") False """ - try: source = _get_dtype(source) target = _get_dtype(target) @@ -770,7 +756,6 @@ def is_any_int_dtype(arr_or_dtype) -> bool: >>> is_any_int_dtype(pd.Index([1, 2.])) # float False """ - return _is_dtype_type(arr_or_dtype, classes(np.integer, np.timedelta64)) @@ -825,7 +810,6 @@ def is_integer_dtype(arr_or_dtype) -> bool: >>> is_integer_dtype(pd.Index([1, 2.])) # float False """ - return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.integer)) @@ -882,7 +866,6 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool: >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned False """ - return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)) @@ -982,7 +965,6 @@ def is_int64_dtype(arr_or_dtype) -> bool: >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned False """ - return _is_dtype_type(arr_or_dtype, classes(np.int64)) @@ -1137,7 +1119,6 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) True """ - return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) @@ -1198,7 +1179,6 @@ def is_numeric_v_string_like(a, b): >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) False """ - is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) @@ -1260,7 +1240,6 @@ def is_datetimelike_v_numeric(a, b): >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) False """ - if not hasattr(a, "dtype"): a = np.asarray(a) if not hasattr(b, "dtype"): @@ -1311,7 +1290,6 @@ def needs_i8_conversion(arr_or_dtype) -> bool: >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True """ - if arr_or_dtype is None: return False return ( @@ -1358,7 +1336,6 @@ def is_numeric_dtype(arr_or_dtype) -> bool: >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) False """ - return _is_dtype_type( arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) ) @@ -1392,7 +1369,6 @@ def is_string_like_dtype(arr_or_dtype) -> bool: >>> is_string_like_dtype(pd.Series([1, 2])) False """ - return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U")) @@ -1638,7 +1614,6 @@ def is_complex_dtype(arr_or_dtype) -> bool: >>> is_complex_dtype(np.array([1 + 1j, 5])) True """ - return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) @@ -1657,7 +1632,6 @@ def _is_dtype(arr_or_dtype, condition) -> bool: bool """ - if arr_or_dtype is None: return False try: @@ -1686,7 +1660,6 @@ def _get_dtype(arr_or_dtype) -> DtypeObj: ------ TypeError : The passed in object is None. """ - if arr_or_dtype is None: raise TypeError("Cannot deduce dtype from null object") @@ -1717,7 +1690,6 @@ def _is_dtype_type(arr_or_dtype, condition) -> bool: ------- bool : if the condition is satisfied for the arr_or_dtype """ - if arr_or_dtype is None: return condition(type(None)) @@ -1767,7 +1739,6 @@ def infer_dtype_from_object(dtype): ------- dtype_object : The extracted numpy dtype.type-style object. """ - if isinstance(dtype, type) and issubclass(dtype, np.generic): # Type object from a dtype return dtype @@ -1827,7 +1798,6 @@ def _validate_date_like_dtype(dtype) -> None: ValueError : The dtype is an illegal date-like dtype (e.g. the the frequency provided is too specific) """ - try: typ = np.datetime_data(dtype)[0] except ValueError as e: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index fdc2eeb34b4ed..e53eb3b4d8e71 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -38,7 +38,6 @@ def get_dtype_kinds(l): ------- a set of kinds that exist in this list of arrays """ - typs = set() for arr in l: @@ -85,7 +84,6 @@ def concat_compat(to_concat, axis: int = 0): ------- a single array, preserving the combined dtypes """ - # filter empty arrays # 1-d dtypes always are included here def is_nonempty(x) -> bool: @@ -153,7 +151,6 @@ def concat_categorical(to_concat, axis: int = 0): Categorical A single array, preserving the combined dtypes """ - # we could have object blocks and categoricals here # if we only have a single categoricals then combine everything # else its a non-compat categorical @@ -381,7 +378,6 @@ def concat_datetime(to_concat, axis=0, typs=None): ------- a single array, preserving the combined dtypes """ - if typs is None: typs = get_dtype_kinds(to_concat) @@ -466,7 +462,6 @@ def _concat_sparse(to_concat, axis=0, typs=None): ------- a single array, preserving the combined dtypes """ - from pandas.core.arrays import SparseArray fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)] diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 8aaebe89871b6..d93ad973ff02d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -831,7 +831,6 @@ def __new__(cls, freq=None): ---------- freq : frequency """ - if isinstance(freq, PeriodDtype): return freq @@ -930,7 +929,6 @@ def is_dtype(cls, dtype) -> bool: Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) """ - if isinstance(dtype, str): # PeriodDtype can be instantiated from freq string like "U", # but doesn't regard freq str like "U" as dtype. @@ -1139,7 +1137,6 @@ def is_dtype(cls, dtype) -> bool: Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) """ - if isinstance(dtype, str): if dtype.lower().startswith("interval"): try: diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index a9cd696633273..56b880dca1241 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -65,7 +65,6 @@ def is_number(obj) -> bool: >>> pd.api.types.is_number("5") False """ - return isinstance(obj, (Number, np.number)) @@ -91,7 +90,6 @@ def _iterable_not_string(obj) -> bool: >>> _iterable_not_string(1) False """ - return isinstance(obj, abc.Iterable) and not isinstance(obj, str) @@ -124,7 +122,6 @@ def is_file_like(obj) -> bool: >>> is_file_like([1, 2, 3]) False """ - if not (hasattr(obj, "read") or hasattr(obj, "write")): return False @@ -177,7 +174,6 @@ def is_re_compilable(obj) -> bool: >>> is_re_compilable(1) False """ - try: re.compile(obj) except TypeError: @@ -215,7 +211,6 @@ def is_array_like(obj) -> bool: >>> is_array_like(("a", "b")) False """ - return is_list_like(obj) and hasattr(obj, "dtype") @@ -321,7 +316,6 @@ def is_named_tuple(obj) -> bool: >>> is_named_tuple((1, 2)) False """ - return isinstance(obj, tuple) and hasattr(obj, "_fields") @@ -386,7 +380,6 @@ def is_sequence(obj) -> bool: >>> is_sequence(iter(l)) False """ - try: iter(obj) # Can iterate over it. len(obj) # Has a length associated with it. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 0bc754b3e8fb3..ee74b02af9516 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -430,7 +430,6 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool: ... np.array([1, 2, np.nan])) False """ - left, right = np.asarray(left), np.asarray(right) # shape compat @@ -504,7 +503,6 @@ def _infer_fill_value(val): scalar/ndarray/list-like if we are a NaT, return the correct dtyped element to provide proper block construction """ - if not is_list_like(val): val = [val] val = np.array(val, copy=False) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 432547fdb2bfa..b7bec7f87e6e1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5013,7 +5013,6 @@ def sort_index( sorted_obj : DataFrame or None DataFrame with sorted index if inplace=False, None otherwise. """ - # TODO: this can be combined with Series.sort_index impl as # almost identical @@ -7040,7 +7039,6 @@ def applymap(self, func) -> "DataFrame": 0 1.000000 4.494400 1 11.262736 20.857489 """ - # if we have a dtype == 'M8[ns]', provide boxed values def infer(x): if x.empty: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 08b48cddde929..934c4c6e92bbe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -264,7 +264,6 @@ def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None: def _validate_dtype(self, dtype): """ validate the passed dtype """ - if dtype is not None: dtype = pandas_dtype(dtype) @@ -356,7 +355,6 @@ def _construct_axes_from_arguments( supplied; useful to distinguish when a user explicitly passes None in scenarios where None has special meaning. """ - # construct the args args = list(args) for a in self._AXIS_ORDERS: @@ -2251,7 +2249,6 @@ def to_json( "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ - from pandas.io import json if date_format is None and orient == "table": @@ -3087,7 +3084,6 @@ def to_csv( >>> df.to_csv('out.zip', index=False, ... compression=compression_opts) # doctest: +SKIP """ - df = self if isinstance(self, ABCDataFrame) else self.to_frame() from pandas.io.formats.csvs import CSVFormatter @@ -3166,7 +3162,6 @@ def _maybe_update_cacher( verify_is_copy : bool, default True Provide is_copy checks. """ - cacher = getattr(self, "_cacher", None) if cacher is not None: ref = cacher[1]() @@ -3580,7 +3575,6 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): df.iloc[0:5]['group'] = 'a' """ - # return early if the check is not needed if not (force or self._is_copy): return @@ -4422,7 +4416,6 @@ def _reindex_with_indexers( allow_dups: bool_t = False, ) -> FrameOrSeries: """allow_dups indicates an internal call here """ - # reindex doing multiple operations on different axes if indicated new_data = self._data for axis in sorted(reindexers.keys()): @@ -4618,7 +4611,6 @@ def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: 4 monkey 5 parrot """ - return self.iloc[:n] def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: @@ -4691,7 +4683,6 @@ def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: 7 whale 8 zebra """ - if n == 0: return self.iloc[0:0] return self.iloc[-n:] @@ -4806,7 +4797,6 @@ def sample( falcon 2 2 10 fish 0 0 8 """ - if axis is None: axis = self._stat_axis_number @@ -5092,7 +5082,6 @@ def __getattr__(self, name: str): """After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ - # Note: obj.x will always call obj.__getattribute__('x') prior to # calling obj.__getattr__('x'). @@ -5111,7 +5100,6 @@ def __setattr__(self, name: str, value) -> None: """After regular attribute access, try setting the name This allows simpler access to columns for interactive use. """ - # first try regular attribute access via __getattribute__, so that # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify # the same attribute. @@ -5214,7 +5202,6 @@ def _is_numeric_mixed_type(self): def _check_inplace_setting(self, value) -> bool_t: """ check whether we allow in-place setting with this type of value """ - if self._is_mixed_type: if not self._is_numeric_mixed_type: @@ -7922,7 +7909,6 @@ def resample( 2000-01-03 32 150 2000-01-04 36 90 """ - from pandas.core.resample import get_resampler axis = self._get_axis_number(axis) @@ -8936,7 +8922,6 @@ def tshift( attributes of the index. If neither of those attributes exist, a ValueError is thrown """ - index = self._get_axis(axis) if freq is None: freq = getattr(index, "freq", None) @@ -9925,7 +9910,6 @@ def _add_numeric_operations(cls): """ Add the operations to the cls; evaluate the doc strings again """ - axis_descr, name, name2 = _doc_parms(cls) cls.any = _make_logical_function( @@ -10163,7 +10147,6 @@ def _add_series_or_dataframe_operations(cls): Add the series or dataframe only operations to the cls; evaluate the doc strings again. """ - from pandas.core.window import EWM, Expanding, Rolling, Window @Appender(Rolling.__doc__) @@ -10277,7 +10260,6 @@ def _find_valid_index(self, how: str): ------- idx_first_valid : type of index """ - idxpos = find_valid_index(self._values, how) if idxpos is None: return None diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 399ed9ddc9ba1..c71ebee397bbd 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -41,7 +41,6 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool): Categorical or None If we are observed, return the original categorical, otherwise None """ - # we only care about observed values if observed: unique_codes = unique1d(c.codes) @@ -90,7 +89,6 @@ def recode_from_groupby(c: Categorical, sort: bool, ci): ------- CategoricalIndex """ - # we re-order to the original category orderings if sort: return ci.set_categories(c.categories) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f194c774cf329..37b6429167646 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1571,7 +1571,6 @@ def filter(self, func, dropna=True, *args, **kwargs): 3 bar 4 1.0 5 bar 6 9.0 """ - indices = [] obj = self._selected_obj @@ -1626,7 +1625,6 @@ def _gotitem(self, key, ndim: int, subset=None): subset : object, default None subset to act on """ - if ndim == 2: if subset is None: subset = self.obj @@ -1844,7 +1842,6 @@ def nunique(self, dropna: bool = True): 4 ham 5 x 5 ham 5 y """ - obj = self._selected_obj def groupby_series(obj, col=None): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 153bf386d4f33..426b3b47d9530 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1174,7 +1174,6 @@ def count(self): Series or DataFrame Count of values within each group. """ - # defined here for API doc raise NotImplementedError @@ -1277,7 +1276,6 @@ def std(self, ddof: int = 1): Series or DataFrame Standard deviation of values within each group. """ - # TODO: implement at Cython level? return np.sqrt(self.var(ddof=ddof)) @@ -1458,7 +1456,6 @@ def ohlc(self) -> DataFrame: DataFrame Open, high, low and close values within each group. """ - return self._apply_to_column_groupbys(lambda x: x._cython_agg_general("ohlc")) @Appender(DataFrame.describe.__doc__) @@ -1764,7 +1761,6 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra 1 1 2.0 4 2 5.0 """ - valid_containers = (set, list, tuple) if not isinstance(n, (valid_containers, int)): raise TypeError("n needs to be an int or a list/set/tuple of ints") @@ -2034,7 +2030,6 @@ def ngroup(self, ascending: bool = True): 5 0 dtype: int64 """ - with _group_selection_context(self): index = self._selected_obj.index result = Series(self.grouper.group_info[0], index) @@ -2095,7 +2090,6 @@ def cumcount(self, ascending: bool = True): 5 0 dtype: int64 """ - with _group_selection_context(self): index = self._selected_obj.index cumcounts = self._cumcount_array(ascending=ascending) @@ -2348,7 +2342,6 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): Series or DataFrame Object shifted within each group. """ - if freq is not None or axis != 0 or not isna(fill_value): return self.apply(lambda x: x.shift(periods, freq, axis, fill_value)) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index f0c6eedf5cee4..8a42a8fa297cd 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -130,7 +130,6 @@ def _get_grouper(self, obj, validate: bool = True): ------- a tuple of binner, grouper, obj (possibly sorted) """ - self._set_grouper(obj) self.grouper, _, self.obj = get_grouper( self.obj, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4e593ce543ea6..63087672d1365 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -433,7 +433,6 @@ def _cython_operation( Names is only useful when dealing with 2D results, like ohlc (see self._name_functions). """ - assert kind in ["transform", "aggregate"] orig_values = values @@ -748,7 +747,6 @@ def __init__( @cache_readonly def groups(self): """ dict {group name -> group labels} """ - # this is mainly for compat # GH 3881 result = { diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 719bf13cbd313..f3bae63aa7e03 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -912,7 +912,6 @@ def _format_data(self, name=None) -> str_t: """ Return the formatted data as a unicode string. """ - # do we want to justify (only do so for non-objects) is_justify = True @@ -1003,7 +1002,6 @@ def to_native_types(self, slicer=None, **kwargs): numpy.ndarray Formatted values. """ - values = self if slicer is not None: values = values[slicer] @@ -1092,7 +1090,6 @@ def to_series(self, index=None, name=None): Series The dtype will be based on the type of the Index values. """ - from pandas import Series if index is None: @@ -1153,7 +1150,6 @@ def to_frame(self, index: bool = True, name=None): 1 Bear 2 Cow """ - from pandas import DataFrame if name is None: @@ -1294,7 +1290,6 @@ def set_names(self, names, level=None, inplace: bool = False): ( 'cobra', 2019)], names=['species', 'year']) """ - if level is not None and not isinstance(self, ABCMultiIndex): raise ValueError("Level must be None for non-MultiIndex") @@ -2548,7 +2543,6 @@ def _union(self, other, sort): ------- Index """ - if not len(other) or self.equals(other): return self._get_reconciled_name_object(other) @@ -3306,7 +3300,6 @@ def _can_reindex(self, indexer): ------ ValueError if its a duplicate axis """ - # trying to reindex on an axis with duplicates if not self.is_unique and len(indexer): raise ValueError("cannot reindex from a duplicate axis") @@ -3391,7 +3384,6 @@ def _reindex_non_unique(self, target): Indices of output values in original index. """ - target = ensure_index(target) indexer, missing = self.get_indexer_non_unique(target) check = indexer != -1 @@ -4182,7 +4174,6 @@ def append(self, other): ------- appended : Index """ - to_concat = [self] if isinstance(other, (list, tuple)): @@ -4725,7 +4716,6 @@ def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: dict {group name -> group labels} """ - # TODO: if we are a MultiIndex, we can do better # that converting to tuples if isinstance(values, ABCMultiIndex): @@ -4757,7 +4747,6 @@ def map(self, mapper, na_action=None): If the function returns a tuple with more than one element a MultiIndex will be returned. """ - from pandas.core.indexes.multi import MultiIndex new_values = super()._map_values(mapper, na_action=na_action) @@ -4923,7 +4912,6 @@ def _maybe_cast_indexer(self, key): If we have a float key and are not a floating index, then try to cast to an int if equivalent. """ - if not self.is_floating(): return com.cast_scalar_indexer(key) return key @@ -5740,7 +5728,6 @@ def _try_convert_to_int_array( ------ ValueError if the conversion was not successful. """ - if not is_unsigned_integer_dtype(dtype): # skip int64 conversion attempt if uint-like dtype is passed, as # this could return Int64Index when UInt64Index is what's desired diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7373f41daefa4..bb62d500311df 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -215,7 +215,6 @@ def _create_from_codes(self, codes, dtype=None, name=None): ------- CategoricalIndex """ - if dtype is None: dtype = self.dtype if name is None: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 941b6c876bb36..d505778d18c52 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -386,7 +386,6 @@ def _convert_scalar_indexer(self, key, kind: str): key : label of the slice bound kind : {'loc', 'getitem'} """ - assert kind in ["loc", "getitem"] if not is_scalar(key): @@ -556,7 +555,6 @@ def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class. """ - new_data = type(self._data)._concat_same_type(to_concat) return self._simple_new(new_data, name=name) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b67d0dcea0ac6..e303e487b1a7d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -939,7 +939,6 @@ def date_range( DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') """ - if freq is None and com.any_none(periods, start, end): freq = "D" diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 04b4b275bf90a..daccb35864e98 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -39,7 +39,6 @@ def inherit_from_data(name: str, delegate, cache: bool = False, wrap: bool = Fal ------- attribute, method, property, or cache_readonly """ - attr = getattr(delegate, name) if isinstance(attr, property): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 9c4cd6cf72d35..6ea4250e4acf4 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -550,7 +550,6 @@ def _can_reindex(self, indexer: np.ndarray) -> None: ------ ValueError if its a duplicate axis """ - # trying to reindex on an axis with duplicates if self.is_overlapping and len(indexer): raise ValueError("cannot reindex from an overlapping axis") diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8a5bb3002213d..6fa42804d2e39 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -139,7 +139,6 @@ def _codes_to_ints(self, codes): int, or 1-dimensional array of dtype object Integer(s) representing one combination (each). """ - # Shift the representation of each level by the pre-calculated number # of bits. Since this can overflow uint64, first make sure we are # working with Python integers: @@ -1115,7 +1114,6 @@ def _nbytes(self, deep: bool = False) -> int: *this is in internal routine* """ - # for implementations with no useful getsizeof (PyPy) objsize = 24 @@ -1405,7 +1403,6 @@ def is_monotonic_increasing(self) -> bool: return if the index is monotonic increasing (only equal or increasing) values. """ - if all(x.is_monotonic for x in self.levels): # If each level is sorted, we can operate on the codes directly. GH27495 return libalgos.is_lexsorted( @@ -1466,7 +1463,6 @@ def _hashed_indexing_key(self, key): ----- we need to stringify if we have mixed levels """ - if not isinstance(key, tuple): return hash_tuples(key) @@ -1526,7 +1522,6 @@ def _get_level_values(self, level, unique=False): ------- values : ndarray """ - lev = self.levels[level] level_codes = self.codes[level] name = self._names[level] @@ -1609,7 +1604,6 @@ def to_frame(self, index=True, name=None): -------- DataFrame """ - from pandas import DataFrame if name is not None: @@ -1736,7 +1730,6 @@ def _sort_levels_monotonic(self): ('b', 'bb')], ) """ - if self.is_lexsorted() and self.is_monotonic: return self @@ -1805,7 +1798,6 @@ def remove_unused_levels(self): >>> mi2.levels FrozenList([[1], ['a', 'b']]) """ - new_levels = [] new_codes = [] @@ -1870,7 +1862,6 @@ def __reduce__(self): def __setstate__(self, state): """Necessary for making this object picklable""" - if isinstance(state, dict): levels = state.get("levels") codes = state.get("codes") @@ -2486,7 +2477,6 @@ def get_slice_bound( MultiIndex.get_locs : Get location for a label/slice/list/mask or a sequence of such. """ - if not isinstance(label, tuple): label = (label,) return self._partial_tup_index(label, side=side) @@ -2596,7 +2586,6 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: -------- Index.get_loc : The get_loc method for (single-level) index. """ - if is_scalar(key) and isna(key): return -1 else: @@ -2751,7 +2740,6 @@ def get_loc_level(self, key, level=0, drop_level: bool = True): >>> mi.get_loc_level(['b', 'e']) (1, None) """ - # different name to distinguish from maybe_droplevels def maybe_mi_droplevels(indexer, levels, drop_level: bool): if not drop_level: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 736db6b963adf..7d6ef11719b3a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -453,7 +453,6 @@ def split_and_operate(self, mask, f, inplace: bool): ------- list of blocks """ - if mask is None: mask = np.broadcast_to(True, shape=self.shape) @@ -510,7 +509,6 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def downcast(self, dtypes=None): """ try to downcast each item to the dict of dtypes if present """ - # turn it off completely if dtypes is False: return self @@ -654,7 +652,6 @@ def convert( of the block (if copy = True) by definition we are not an ObjectBlock here! """ - return self.copy() if copy else self def _can_hold_element(self, element: Any) -> bool: @@ -700,7 +697,6 @@ def replace( blocks here this is just a call to putmask. regex is not used here. It is used in ObjectBlocks. It is here for API compatibility. """ - inplace = validate_bool_kwarg(inplace, "inplace") original_to_replace = to_replace @@ -936,7 +932,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) ------- a list of new blocks, the result of the putmask """ - new_values = self.values if inplace else self.values.copy() new = getattr(new, "values", new) @@ -1046,7 +1041,6 @@ def coerce_to_target_dtype(self, other): we can also safely try to coerce to the same dtype and will receive the same block """ - # if we cannot then coerce to object dtype, _ = infer_dtype_from(other, pandas_dtype=True) @@ -1179,7 +1173,6 @@ def _interpolate_with_fill( downcast=None, ): """ fillna but using the interpolate machinery """ - inplace = validate_bool_kwarg(inplace, "inplace") # if we are coercing, then don't force the conversion @@ -1223,7 +1216,6 @@ def _interpolate( **kwargs, ): """ interpolate using scipy wrappers """ - inplace = validate_bool_kwarg(inplace, "inplace") data = self.values if inplace else self.values.copy() @@ -1271,7 +1263,6 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): Take values according to indexer and return them as a block.bb """ - # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock # so need to preserve types # sparse is treated like an ndarray, but needs .get_values() shaping @@ -1310,7 +1301,6 @@ def diff(self, n: int, axis: int = 1) -> List["Block"]: def shift(self, periods, axis=0, fill_value=None): """ shift the block by periods, possibly upcast """ - # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also new_values, fill_value = maybe_upcast(self.values, fill_value) @@ -1570,7 +1560,6 @@ def _replace_coerce( ------- A new block if there is anything to replace or the original block. """ - if mask.any(): if not regex: self = self.coerce_to_target_dtype(value) @@ -1852,7 +1841,6 @@ def _can_hold_element(self, element: Any) -> bool: def _slice(self, slicer): """ return a slice of my values """ - # slice the category # return same dims as we currently have @@ -2055,7 +2043,6 @@ def to_native_types( **kwargs, ): """ convert to our native types format, slicing if desired """ - values = self.values if slicer is not None: values = values[:, slicer] @@ -2242,7 +2229,6 @@ def to_native_types( self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs ): """ convert to our native types format, slicing if desired """ - values = self.values i8values = self.values.view("i8") @@ -2520,7 +2506,6 @@ def should_store(self, value): def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ - values = self.values if slicer is not None: values = values[:, slicer] @@ -2613,7 +2598,6 @@ def convert( can return multiple blocks! """ - # operate column-by-column def f(mask, val, idx): shape = val.shape @@ -2915,7 +2899,6 @@ def to_dense(self): def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ - values = self.values if slicer is not None: # Categorical is always one dimension @@ -3051,7 +3034,6 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None): def _extend_blocks(result, blocks=None): """ return a new extended blocks, given the result """ - if blocks is None: blocks = [] if isinstance(result, list): @@ -3147,7 +3129,6 @@ def _putmask_smart(v, mask, n): -------- ndarray.putmask """ - # we cannot use np.asarray() here as we cannot have conversions # that numpy does when numeric are mixed with strings diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 9fd7ff073afdc..515e1bcd761b6 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -408,7 +408,6 @@ def _trim_join_unit(join_unit, length): Extra items that didn't fit are returned as a separate block. """ - if 0 not in join_unit.indexers: extra_indexers = join_unit.indexers diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 798386825d802..9dd4312a39525 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -78,7 +78,6 @@ def masked_rec_array_to_mgr(data, index, columns, dtype, copy: bool): """ Extract from a masked rec array and create the manager. """ - # essentially process a record array then fill it fill_value = data.fill_value fdata = ma.getdata(data) @@ -555,7 +554,6 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): tuple arrays, columns """ - if columns is None: gen = (list(x.keys()) for x in data) sort = not any(isinstance(d, dict) for d in data) @@ -603,7 +601,6 @@ def sanitize_index(data, index: Index): Sanitize an index type to return an ndarray of the underlying, pass through a non-Index. """ - if len(data) != len(index): raise ValueError("Length of values does not match length of index") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0ec471cf366fe..3dc7dd7d81530 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -357,7 +357,6 @@ def apply(self, f, filter=None, **kwargs): ------- BlockManager """ - result_blocks = [] # filter kwarg is used in replace-* family of methods @@ -453,7 +452,6 @@ def quantile( ------- Block Manager (new object) """ - # Series dispatches to DataFrame for quantile, which allows us to # simplify some of the code here and in the blocks assert self.ndim >= 2 @@ -569,7 +567,6 @@ def replace(self, value, **kwargs): def replace_list(self, src_list, dest_list, inplace=False, regex=False): """ do a list replace """ - inplace = validate_bool_kwarg(inplace, "inplace") # figure out our mask a-priori to avoid repeated replacements @@ -1243,7 +1240,6 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): ------- new_blocks : list of Block """ - allow_fill = fill_tuple is not None sl_type, slobj, sllen = _preprocess_slice_or_indexer( @@ -1774,7 +1770,6 @@ def _simple_blockify(tuples, dtype): def _multi_blockify(tuples, dtype=None): """ return an array of blocks that potentially have different dtypes """ - # group by dtype grouper = itertools.groupby(tuples, lambda x: x[2].dtype) @@ -1840,7 +1835,6 @@ def _consolidate(blocks): """ Merge blocks having same dtype, exclude non-consolidating blocks """ - # sort by _can_consolidate, dtype gkey = lambda x: x._consolidate_key grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2bf2be082f639..422afd061762b 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -224,7 +224,6 @@ def _maybe_get_mask( ------- Optional[np.ndarray] """ - if mask is None: if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): # Boolean data cannot contain nulls, so signal via mask being None @@ -279,7 +278,6 @@ def _get_values( fill_value : Any fill value used """ - # In _get_values is only called from within nanops, and in all cases # with scalar fill_value. This guarantee is important for the # maybe_upcast_putmask call below @@ -338,7 +336,6 @@ def _na_ok_dtype(dtype) -> bool: def _wrap_results(result, dtype: Dtype, fill_value=None): """ wrap our results if needed """ - if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): if fill_value is None: # GH#24293 @@ -833,7 +830,6 @@ def nansem( >>> nanops.nansem(s) 0.5773502691896258 """ - # This checks if non-numeric-like data is passed with numeric_only=False # and raises a TypeError otherwise nanvar(values, axis, skipna, ddof=ddof, mask=mask) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 0312c11a6d590..f3c1a609d50a1 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -254,7 +254,6 @@ def _get_opstr(op): ------- op_str : string or None """ - return { operator.add: "+", radd: "+", @@ -430,7 +429,6 @@ def column_op(a, b): def _align_method_SERIES(left, right, align_asobject=False): """ align lhs and rhs Series """ - # ToDo: Different from _align_method_FRAME, list, tuple and ndarray # are not coerced here # because Series has inconsistencies described in #13637 diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 3302ed9c219e6..5d53856729d0c 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -175,7 +175,6 @@ def arithmetic_op( ndarrray or ExtensionArray Or a 2-tuple of these in the case of divmod or rdivmod. """ - from pandas.core.ops import maybe_upcast_for_op # NB: We assume that extract_array has already been called @@ -218,7 +217,6 @@ def comparison_op( ------- ndarrray or ExtensionArray """ - # NB: We assume extract_array has already been called on left and right lvalues = left rvalues = right @@ -322,7 +320,6 @@ def logical_op( ------- ndarrray or ExtensionArray """ - fill_int = lambda x: x def fill_bool(x, left=None): diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index f4b16cf4a0cf2..5c83591b0e71e 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -43,7 +43,6 @@ def _unpack_zerodim_and_defer(method, name: str): ------- method """ - is_cmp = name.strip("__") in {"eq", "ne", "lt", "le", "gt", "ge"} @wraps(method) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 94ff1f0056663..98910a9baf962 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -183,7 +183,6 @@ def _get_binner(self): Create the BinGrouper, assume that self.set_grouper(obj) has already been called. """ - binner, bins, binlabels = self._get_binner_for_time() assert len(bins) == len(binlabels) bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer) @@ -345,7 +344,6 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): """ Re-evaluate the obj with a groupby aggregation. """ - if grouper is None: self._set_binner() grouper = self.grouper @@ -397,7 +395,6 @@ def _apply_loffset(self, result): result : Series or DataFrame the result of resample """ - needs_offset = ( isinstance(self.loffset, (DateOffset, timedelta, np.timedelta64)) and isinstance(result.index, DatetimeIndex) @@ -1158,7 +1155,6 @@ def _downsample(self, how, **kwargs): how : string / cython mapped function **kwargs : kw args passed to how function """ - # we may need to actually resample as if we are timestamps if self.kind == "timestamp": return super()._downsample(how, **kwargs) @@ -1202,7 +1198,6 @@ def _upsample(self, method, limit=None, fill_value=None): .fillna """ - # we may need to actually resample as if we are timestamps if self.kind == "timestamp": return super()._upsample(method, limit=limit, fill_value=fill_value) @@ -1277,7 +1272,6 @@ def get_resampler_for_grouping( """ Return our appropriate resampler when grouping as well. """ - # .resample uses 'on' similar to how .groupby uses 'key' kwargs["key"] = kwargs.pop("on", None) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 480c5279ad3f6..49ac1b6cfa52b 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -108,7 +108,6 @@ def _groupby_and_merge( check_duplicates: bool, default True should we check & clean duplicates """ - pieces = [] if not isinstance(by, (list, tuple)): by = [by] diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b047e163c5565..b04e4e1ac4d48 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -567,7 +567,6 @@ def crosstab( b 0 1 0 c 0 0 0 """ - index = com.maybe_make_list(index) columns = com.maybe_make_list(columns) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index a18b45a077be0..e499158a13b0c 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -513,7 +513,6 @@ def _format_labels( bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None ): """ based on the dtype, return our labels """ - closed = "right" if right else "left" if is_datetime64tz_dtype(dtype): @@ -544,7 +543,6 @@ def _preprocess_for_cut(x): input to array, strip the index information and store it separately """ - # Check that the passed array is a Pandas or Numpy object # We don't want to strip away a Pandas data-type here (e.g. datetimetz) ndim = getattr(x, "ndim", None) diff --git a/pandas/core/series.py b/pandas/core/series.py index 946caaac97838..24e794014a15f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -397,7 +397,6 @@ def _set_axis(self, axis, labels, fastpath: bool = False) -> None: """ Override generic, we want to set the _typ here. """ - if not fastpath: labels = ensure_index(labels) @@ -541,7 +540,6 @@ def _internal_get_values(self): numpy.ndarray Data of the Series. """ - return self._data.get_values() # ops @@ -1403,7 +1401,6 @@ def to_string( str or None String representation of Series if ``buf=None``, otherwise None. """ - formatter = fmt.SeriesFormatter( self, name=name, @@ -2172,7 +2169,6 @@ def quantile(self, q=0.5, interpolation="linear"): 0.75 3.25 dtype: float64 """ - validate_percentile(q) # We dispatch to DataFrame so that core.internals only has to worry @@ -2584,7 +2580,6 @@ def _binop(self, other, func, level=None, fill_value=None): ------- Series """ - if not isinstance(other, Series): raise AssertionError("Other operand must be Series") diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 51c154aa47518..5496eca46b992 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -376,7 +376,6 @@ def compress_group_index(group_index, sort: bool = True): space can be huge, so this function compresses it, by computing offsets (comp_ids) into the list of unique labels (obs_group_ids). """ - size_hint = min(len(group_index), hashtable._SIZE_HINT_LIMIT) table = hashtable.Int64HashTable(size_hint) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a4648186477d6..3a7e3fdab5dca 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -687,7 +687,6 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): 2 NaN dtype: object """ - # Check whether repl is valid (GH 13438, GH 15055) if not (isinstance(repl, str) or callable(repl)): raise TypeError("repl must be a string or callable") @@ -1085,7 +1084,6 @@ def str_extractall(arr, pat, flags=0): B 0 b 1 C 0 NaN 1 """ - regex = re.compile(pat, flags=flags) # the regex must contain capture groups. if regex.groups == 0: @@ -1358,7 +1356,6 @@ def str_find(arr, sub, start=0, end=None, side="left"): Series or Index Indexes where substring is found. """ - if not isinstance(sub, str): msg = f"expected a string object, not {type(sub).__name__}" raise TypeError(msg) @@ -1930,7 +1927,6 @@ def forbid_nonstring_types(forbidden, name=None): TypeError If the inferred type of the underlying data is in `forbidden`. """ - # deal with None forbidden = [] if forbidden is None else forbidden diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 3f0cfce39f6f9..1d933cf431b4b 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -111,7 +111,6 @@ def to_timedelta(arg, unit="ns", errors="raise"): def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): """Convert string 'r' to a timedelta object.""" - try: result = Timedelta(r, unit) except ValueError: @@ -128,7 +127,6 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): def _convert_listlike(arg, unit="ns", errors="raise", name=None): """Convert a list of objects to a timedelta index object.""" - if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): # This is needed only to ensure that in the case where we end up # returning arg (errors == "ignore"), and where the input is a diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 3366f10b92604..160d328ec16ec 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -269,7 +269,6 @@ def hash_array( ------- 1d uint64 numpy array of hash values, same length as the vals """ - if not hasattr(vals, "dtype"): raise TypeError("must pass a ndarray-like") dtype = vals.dtype @@ -340,7 +339,6 @@ def _hash_scalar( ------- 1d uint64 numpy array of hash value, of length 1 """ - if isna(val): # this is to be consistent with the _hash_categorical implementation return np.array([np.iinfo(np.uint64).max], dtype="u8") diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 127957943d2ff..d6e8194c861fa 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -110,7 +110,6 @@ def generate_numba_apply_func( ------- Numba function """ - if engine_kwargs is None: engine_kwargs = {} diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 5c18796deb07a..f29cd428b7bad 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -149,7 +149,6 @@ def _create_blocks(self): """ Split data into blocks & return conformed data. """ - obj = self._selected_obj # filter out the on from the object @@ -172,7 +171,6 @@ def _gotitem(self, key, ndim, subset=None): subset : object, default None subset to act on """ - # create a new object to prevent aliasing if subset is None: subset = self.obj @@ -238,7 +236,6 @@ def __repr__(self) -> str: """ Provide a nice str repr of our rolling object. """ - attrs_list = ( f"{attr_name}={getattr(self, attr_name)}" for attr_name in self._attributes @@ -284,7 +281,6 @@ def _wrap_result(self, result, block=None, obj=None): """ Wrap a single result. """ - if obj is None: obj = self._selected_obj index = obj.index @@ -310,7 +306,6 @@ def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: obj : conformed data (may be resampled) exclude: list of columns to exclude, default to None """ - from pandas import Series, concat final = [] @@ -1021,7 +1016,6 @@ def _get_window( window : ndarray the window, weights """ - window = self.window if isinstance(window, (list, tuple, np.ndarray)): return com.asarray_tuplesafe(window).astype(float) diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 6d76d7de407b1..f4bd14ad5c679 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -500,7 +500,6 @@ def determine_clipboard(): Determine the OS/platform and set the copy() and paste() functions accordingly. """ - global Foundation, AppKit, qtpy, PyQt4, PyQt5 # Setup for the CYGWIN platform: diff --git a/pandas/io/common.py b/pandas/io/common.py index c4772895afd1e..beb6c9d97aff3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -296,7 +296,6 @@ def infer_compression( ------ ValueError on invalid compression specified. """ - # No compression has been explicitly specified if compression is None: return None diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index be52523e486af..ab2d97e6026d1 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -51,7 +51,6 @@ def _convert_to_style(cls, style_dict): ---------- style_dict : style dictionary to convert """ - from openpyxl.style import Style xls_style = Style() @@ -92,7 +91,6 @@ def _convert_to_style_kwargs(cls, style_dict): value has been replaced with a native openpyxl style object of the appropriate class. """ - _style_key_map = {"borders": "border"} style_kwargs = {} @@ -128,7 +126,6 @@ def _convert_to_color(cls, color_spec): ------- color : openpyxl.styles.Color """ - from openpyxl.styles import Color if isinstance(color_spec, str): @@ -164,7 +161,6 @@ def _convert_to_font(cls, font_dict): ------- font : openpyxl.styles.Font """ - from openpyxl.styles import Font _font_key_map = { @@ -202,7 +198,6 @@ def _convert_to_stop(cls, stop_seq): ------- stop : list of openpyxl.styles.Color """ - return map(cls._convert_to_color, stop_seq) @classmethod @@ -230,7 +225,6 @@ def _convert_to_fill(cls, fill_dict): ------- fill : openpyxl.styles.Fill """ - from openpyxl.styles import PatternFill, GradientFill _pattern_fill_key_map = { @@ -286,7 +280,6 @@ def _convert_to_side(cls, side_spec): ------- side : openpyxl.styles.Side """ - from openpyxl.styles import Side _side_key_map = {"border_style": "style"} @@ -329,7 +322,6 @@ def _convert_to_border(cls, border_dict): ------- border : openpyxl.styles.Border """ - from openpyxl.styles import Border _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} @@ -365,7 +357,6 @@ def _convert_to_alignment(cls, alignment_dict): ------- alignment : openpyxl.styles.Alignment """ - from openpyxl.styles import Alignment return Alignment(**alignment_dict) @@ -399,7 +390,6 @@ def _convert_to_protection(cls, protection_dict): Returns ------- """ - from openpyxl.styles import Protection return Protection(**protection_dict) diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index e7a132b73e076..16f800a6de2c9 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -60,7 +60,6 @@ def _parse_cell(cell_contents, cell_typ): """ converts the contents of the cell into a pandas appropriate object """ - if cell_typ == XL_CELL_DATE: # Use the newer xlrd datetime handling. diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 6d9ff9be5249a..85a1bb031f457 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -85,7 +85,6 @@ def convert(cls, style_dict, num_format_str=None): style_dict : style dictionary to convert num_format_str : optional number format string """ - # Create a XlsxWriter format object. props = {} @@ -191,7 +190,6 @@ def save(self): """ Save workbook to disk. """ - return self.book.close() def write_cells( diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 35a6870c1194b..55d534f975b68 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -932,7 +932,6 @@ def to_latex( """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ - from pandas.io.formats.latex import LatexFormatter return LatexFormatter( @@ -1135,7 +1134,6 @@ def format_array( ------- List[str] """ - fmt_klass: Type[GenericArrayFormatter] if is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter @@ -1296,9 +1294,7 @@ def _value_formatter( float_format: Optional[float_format_type] = None, threshold: Optional[Union[float, int]] = None, ) -> Callable: - """Returns a function to be applied on each value to format it - """ - + """Returns a function to be applied on each value to format it""" # the float_format parameter supersedes self.float_format if float_format is None: float_format = self.float_format @@ -1346,7 +1342,6 @@ def get_result_as_array(self) -> np.ndarray: Returns the float values converted into strings using the parameters given at initialisation, as a numpy array """ - if self.formatter is not None: return np.array([self.formatter(x) for x in self.values]) @@ -1461,7 +1456,6 @@ def __init__( def _format_strings(self) -> List[str]: """ we by definition have DO NOT have a TZ """ - values = self.values if not isinstance(values, DatetimeIndex): @@ -1541,7 +1535,6 @@ def format_percentiles( >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] """ - percentiles = np.asarray(percentiles) # It checks for np.NaN as well @@ -1642,7 +1635,6 @@ def _get_format_datetime64_from_values( values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str] ) -> Optional[str]: """ given values and a date_format, return a string format """ - if isinstance(values, np.ndarray) and values.ndim > 1: # We don't actually care about the order of values, and DatetimeIndex # only accepts 1D values @@ -1657,7 +1649,6 @@ def _get_format_datetime64_from_values( class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self) -> List[str]: """ we by definition have a TZ """ - values = self.values.astype(object) is_dates_only = _is_dates_only(values) formatter = self.formatter or _get_format_datetime64( @@ -1698,7 +1689,6 @@ def _get_format_timedelta64( If box, then show the return in quotes """ - values_int = values.astype(np.int64) consider_values = values_int != iNaT @@ -1913,7 +1903,6 @@ def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> Non See also EngFormatter. """ - set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) set_option("display.column_space", max(12, accuracy + 9)) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 8ab56437d5c05..935762598f78a 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -56,7 +56,6 @@ def write_result(self, buf: IO[str]) -> None: Render a DataFrame to a LaTeX tabular, longtable, or table/tabular environment output. """ - # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format( diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 565752e269d79..eca5a3fb18e60 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -802,7 +802,6 @@ def where( -------- Styler.applymap """ - if other is None: other = "" diff --git a/pandas/io/html.py b/pandas/io/html.py index c676bfb1f0c74..ee8e96b4b3344 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -395,7 +395,6 @@ def _parse_thead_tbody_tfoot(self, table_html): - Move rows from bottom of body to footer only if all elements inside row are """ - header_rows = self._parse_thead_tr(table_html) body_rows = self._parse_tbody_tr(table_html) footer_rows = self._parse_tfoot_tr(table_html) @@ -435,7 +434,6 @@ def _expand_colspan_rowspan(self, rows): Any cell with ``rowspan`` or ``colspan`` will have its contents copied to subsequent cells. """ - all_texts = [] # list of rows, each a list of str remainder = [] # list of (index, text, nrows) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 04fd17a00041b..39ee097bc743b 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -266,7 +266,6 @@ def __init__( to know what the index is, forces orient to records, and forces date_format to 'iso'. """ - super().__init__( obj, orient, @@ -572,7 +571,6 @@ def read_json( "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ - if orient == "table" and dtype: raise ValueError("cannot pass both dtype and orient='table'") if orient == "table" and convert_axes: @@ -886,7 +884,6 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): """ Try to parse a ndarray like into a column by inferring dtype. """ - # don't try to coerce, unless a force conversion if use_dtypes: if not self.dtype: @@ -963,7 +960,6 @@ def _try_convert_to_date(self, data): Try to coerce object in epoch/iso formats and integer/float in epoch formats. Return a boolean if parsing was successful. """ - # no conversion on empty if not len(data): return data, False @@ -1117,7 +1113,6 @@ def _process_converter(self, f, filt=None): """ Take a conversion function and possibly recreate the frame. """ - if filt is None: filt = lambda col, c: True diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index b638bdc0bc1eb..08dca6b573a2f 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -18,7 +18,6 @@ def convert_to_line_delimits(s): """ Helper function that converts JSON lists to line delimited JSON. """ - # Determine we have a JSON list to turn to lines otherwise just return the # json object, only lists can if not s[0] == "[" and s[-1] == "]": diff --git a/pandas/io/orc.py b/pandas/io/orc.py index bbefe447cb7fe..ea79efd0579e5 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -42,7 +42,6 @@ def read_orc( ------- DataFrame """ - # we require a newer version of pyarrow than we support for parquet import pyarrow diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 926635062d853..9ae9729fc05ee 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -13,7 +13,6 @@ def get_engine(engine: str) -> "BaseImpl": """ return our implementation """ - if engine == "auto": engine = get_option("io.parquet.engine") @@ -297,6 +296,5 @@ def read_parquet(path, engine: str = "auto", columns=None, **kwargs): ------- DataFrame """ - impl = get_engine(engine) return impl.read(path, columns=columns, **kwargs) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 0173a25b0ec16..d7eb69d3a6048 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -407,7 +407,6 @@ def _validate_names(names): ValueError If names are not unique. """ - if names is not None: if len(names) != len(set(names)): raise ValueError("Duplicate names are not allowed.") @@ -760,7 +759,6 @@ def read_fwf( -------- >>> pd.read_fwf('data.csv') # doctest: +SKIP """ - # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") @@ -1252,7 +1250,6 @@ def _validate_skipfooter_arg(skipfooter): ------ ValueError : 'skipfooter' was not a non-negative integer. """ - if not is_integer(skipfooter): raise ValueError("skipfooter must be an integer") @@ -1795,7 +1792,6 @@ def _cast_types(self, values, cast_type, column): ------- converted : ndarray """ - if is_categorical_dtype(cast_type): known_cats = ( isinstance(cast_type, CategoricalDtype) @@ -2863,7 +2859,6 @@ def _alert_malformed(self, msg, row_num): Because this row number is displayed, we 1-index, even though we 0-index internally. """ - if self.error_bad_lines: raise ParserError(msg) elif self.warn_bad_lines: @@ -2882,7 +2877,6 @@ def _next_iter_line(self, row_num): ---------- row_num : The row number of the line being parsed. """ - try: return next(self.data) except csv.Error as e: @@ -2942,7 +2936,6 @@ def _remove_empty_lines(self, lines): filtered_lines : array-like The same array of lines with the "empty" ones removed. """ - ret = [] for l in lines: # Remove empty lines and lines with only one whitespace value @@ -3514,7 +3507,6 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na): 1) na_values : the string NaN values for that column. 2) na_fvalues : the float NaN values for that column. """ - if isinstance(na_values, dict): if col in na_values: return na_values[col], na_fvalues[col] diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ccd5814287173..a75819d33d967 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -114,7 +114,6 @@ def _ensure_term(where, scope_level: int): that are passed create the terms here with a frame_level=2 (we are 2 levels down) """ - # only consider list/tuple here as an ndarray is automatically a coordinate # list level = scope_level + 1 @@ -246,7 +245,6 @@ def to_hdf( encoding: str = "UTF-8", ): """ store this object, close it if we opened it """ - if append: f = lambda store: store.append( key, @@ -362,7 +360,6 @@ def read_hdf( >>> df.to_hdf('./store.h5', 'data') >>> reread = pd.read_hdf('./store.h5') """ - if mode not in ["r", "r+", "a"]: raise ValueError( f"mode {mode} is not allowed while performing a read. " @@ -903,7 +900,6 @@ def select_as_multiple( raises TypeError if keys is not a list or tuple raises ValueError if the tables are not ALL THE SAME DIMENSIONS """ - # default to single select where = _ensure_term(where, scope_level=1) if isinstance(keys, (list, tuple)) and len(keys) == 1: @@ -1303,7 +1299,6 @@ def create_table_index( ------ TypeError: raises if the node is not a table """ - # version requirements _tables() s = self.get_storer(key) @@ -1523,7 +1518,6 @@ def _check_if_open(self): def _validate_format(self, format: str) -> str: """ validate / deprecate formats """ - # validate try: format = _FORMAT_MAP[format.lower()] @@ -1541,7 +1535,6 @@ def _create_storer( errors: str = "strict", ) -> Union["GenericFixed", "Table"]: """ return a suitable class to operate """ - cls: Union[Type["GenericFixed"], Type["Table"]] if value is not None and not isinstance(value, (Series, DataFrame)): @@ -2027,7 +2020,6 @@ def validate_and_set(self, handler: "AppendableTable", append: bool): def validate_col(self, itemsize=None): """ validate this column: return the compared against itemsize """ - # validate this column for string truncation (or reset to the max size) if _ensure_decoded(self.kind) == "string": c = self.col @@ -2059,7 +2051,6 @@ def update_info(self, info): set/update the info for this indexable with the key/value if there is a conflict raise/warn as needed """ - for key in self._info_fields: value = getattr(self, key, None) @@ -2242,7 +2233,6 @@ def _get_atom(cls, values: Union[np.ndarray, ABCExtensionArray]) -> "Col": """ Get an appropriately typed and shaped pytables.Col object for values. """ - dtype = values.dtype itemsize = dtype.itemsize @@ -2608,7 +2598,6 @@ def infer_axes(self): infer the axes of my storer return a boolean indicating if we have a valid storer or not """ - s = self.storable if s is None: return False @@ -2894,7 +2883,6 @@ def read_index_node( def write_array_empty(self, key: str, value: ArrayLike): """ write a 0-len array """ - # ugly hack for length 0 axes arr = np.empty((1,) * value.ndim) self._handle.create_array(self.group, key, arr) @@ -3303,7 +3291,6 @@ def data_orientation(self): def queryables(self) -> Dict[str, Any]: """ return a dict of the kinds allowable columns for this object """ - # mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here axis_names = {0: "index", 1: "columns"} @@ -3512,7 +3499,6 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): Cannot index Time64Col or ComplexCol. Pytables must be >= 3.0. """ - if not self.infer_axes(): return if columns is False: @@ -3579,7 +3565,6 @@ def _read_axes( ------- List[Tuple[index_values, column_values]] """ - # create the selection selection = Selection(self, where=where, start=start, stop=stop) values = selection.select() @@ -3607,7 +3592,6 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): """take the input data_columns and min_itemize and create a data columns spec """ - if not len(non_index_axes): return [] @@ -3674,7 +3658,6 @@ def _create_axes( min_itemsize: Dict[str, int] or None, default None The min itemsize for a column in bytes. """ - if not isinstance(obj, DataFrame): group = self.group._v_name raise TypeError( @@ -3928,7 +3911,6 @@ def get_blk_items(mgr, blocks): def process_axes(self, obj, selection: "Selection", columns=None): """ process axes filters """ - # make a copy to avoid side effects if columns is not None: columns = list(columns) @@ -3993,7 +3975,6 @@ def create_description( expectedrows: Optional[int], ) -> Dict[str, Any]: """ create the description of the table from the axes & values """ - # provided expected rows if its passed if expectedrows is None: expectedrows = max(self.nrows_expected, 10000) @@ -4023,7 +4004,6 @@ def read_coordinates( """select coordinates (row numbers) from a table; return the coordinates object """ - # validate the version self.validate_version(where) @@ -4053,7 +4033,6 @@ def read_column( """return a single column from the table, generally only indexables are interesting """ - # validate the version self.validate_version() @@ -4185,7 +4164,6 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): """ we form the data into a 2-d including indexes,values,mask write chunk-by-chunk """ - names = self.dtype.names nrows = self.nrows_expected @@ -4258,7 +4236,6 @@ def write_data_chunk( mask : an array of the masks values : an array of the values """ - # 0 len for v in values: if not np.prod(v.shape): @@ -4853,7 +4830,6 @@ def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.nd ------- np.ndarray[fixed-length-string] """ - # encode if needed if len(data): data = ( diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 3cf7fd885e564..461d393dc4521 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -198,7 +198,6 @@ def _parse_float_vec(vec): Parse a vector of float values representing IBM 8 byte floats into native 8 byte floats. """ - dtype = np.dtype(">u4,>u4") vec1 = vec.view(dtype=dtype) xport1 = vec1["f0"] @@ -411,7 +410,6 @@ def _record_count(self) -> int: Side effect: returns file position to record_start. """ - self.filepath_or_buffer.seek(0, 2) total_records_length = self.filepath_or_buffer.tell() - self.record_start diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 36291b2faeed0..69e5a973ff706 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -120,7 +120,6 @@ def _parse_date_columns(data_frame, parse_dates): def _wrap_result(data, columns, index_col=None, coerce_float=True, parse_dates=None): """Wrap result set of query in a DataFrame.""" - frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) frame = _parse_date_columns(frame, parse_dates) @@ -228,7 +227,6 @@ def read_sql_table( -------- >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP """ - con = _engine_builder(con) if not _is_sqlalchemy_connectable(con): raise NotImplementedError( @@ -758,7 +756,6 @@ def _query_iterator( self, result, chunksize, columns, coerce_float=True, parse_dates=None ): """Return generator through chunked result set.""" - while True: data = result.fetchmany(chunksize) if not data: @@ -1149,7 +1146,6 @@ def _query_iterator( result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None ): """Return generator through chunked result set""" - while True: data = result.fetchmany(chunksize) if not data: @@ -1606,7 +1602,6 @@ def _query_iterator( cursor, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None ): """Return generator through chunked result set""" - while True: data = cursor.fetchmany(chunksize) if type(data) == tuple: @@ -1781,6 +1776,5 @@ def get_schema(frame, name, keys=None, con=None, dtype=None): be a SQLAlchemy type, or a string for sqlite3 fallback connection. """ - pandas_sql = pandasSQL_builder(con=con) return pandas_sql._create_sql_schema(frame, name, keys=keys, dtype=dtype) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 4e1fcb97e7891..cf3251faae979 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2131,7 +2131,6 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: """Check for categorical columns, retain categorical information for Stata file and convert categorical data to int """ - is_cat = [is_categorical_dtype(data[col]) for col in data] self._is_col_cat = is_cat self._value_labels: List[StataValueLabel] = [] @@ -2771,7 +2770,6 @@ def generate_table(self) -> Tuple[Dict[str, Tuple[int, int]], DataFrame]: * 118: 6 * 119: 5 """ - gso_table = self._gso_table gso_df = self.df columns = list(gso_df.columns) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 770f89324badb..c399e5b9b7017 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -982,7 +982,6 @@ def __init__( def _get_default_locs(self, vmin, vmax): """Returns the default locations of ticks.""" - if self.plot_obj.date_axis_info is None: self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) @@ -1063,7 +1062,6 @@ def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): def _set_default_format(self, vmin, vmax): """Returns the default ticks spacing.""" - if self.plot_obj.date_axis_info is None: self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) info = self.plot_obj.date_axis_info diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 3f47d325d86ef..63d0b8abe59d9 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -432,7 +432,6 @@ def _add_table(self): def _post_plot_logic_common(self, ax, data): """Common post process for each axes""" - if self.orientation == "vertical" or self.orientation is None: self._apply_axis_properties(ax.xaxis, rot=self.rot, fontsize=self.fontsize) self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) @@ -515,7 +514,6 @@ def _apply_axis_properties(self, axis, rot=None, fontsize=None): multiple times per draw. It's therefore beneficial for us to avoid accessing unless we will act on the Tick. """ - if rot is not None or fontsize is not None: # rot=0 is a valid setting, hence the explicit None check labels = axis.get_majorticklabels() + axis.get_minorticklabels() @@ -756,7 +754,6 @@ def _parse_errorbars(self, label, err): key in the plotted DataFrame str: the name of the column within the plotted DataFrame """ - if err is None: return None diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 4c917b9bb42d2..8da2797835080 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -156,7 +156,6 @@ def get_is_dtype_funcs(): begin with 'is_' and end with 'dtype' """ - fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))] return [getattr(com, fname) for fname in fnames] diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 25b2997eb088f..54662c94baa05 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -60,7 +60,6 @@ def assert_stat_op_calc( skipna_alternative : function, default None NaN-safe version of alternative """ - f = getattr(frame, opname) if check_dates: @@ -150,7 +149,6 @@ def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only has_numeric_only : bool, default False Whether the method "opname" has the kwarg "numeric_only" """ - # make sure works on mixed-type frame getattr(float_string_frame, opname)(axis=0) getattr(float_string_frame, opname)(axis=1) @@ -178,7 +176,6 @@ def assert_bool_op_calc(opname, alternative, frame, has_skipna=True): has_skipna : bool, default True Whether the method "opname" has the kwarg "skip_na" """ - f = getattr(frame, opname) if has_skipna: diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 1f4fd90d9b059..02d803795e79c 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -28,7 +28,6 @@ def _construct(self, shape, value=None, dtype=None, **kwargs): if value is specified use that if its a scalar if value is an array, repeat it as needed """ - if isinstance(shape, int): shape = tuple([shape] * self._ndim) if value is not None: diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 8967ef06f50fb..0ad829dd4de7a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -520,7 +520,6 @@ def _check_cython_group_transform_cumulative(pd_op, np_op, dtype): dtype : type The specified dtype of the data. """ - is_datetimelike = False data = np.array([[1], [2], [3], [4]], dtype=dtype) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index cd8e8c3542cce..7574e4501f5aa 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -791,7 +791,6 @@ def test_dti_tz_constructors(self, tzstr): """ Test different DatetimeIndex constructions with timezone Follow-up of GH#4229 """ - arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"] idx1 = to_datetime(arr).tz_localize(tzstr) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 0e5d1d45ad6db..24616f05c19ce 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -704,7 +704,6 @@ def test_len_specialised(self, step): ) def appends(self, request): """Inputs and expected outputs for RangeIndex.append test""" - return request.param def test_append(self, appends): diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 5bedc4089feba..8e749e0752087 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -225,7 +225,6 @@ def test_union_noncomparable(self, sort): ) def unions(self, request): """Inputs and expected outputs for RangeIndex.union tests""" - return request.param def test_union_sorted(self, unions): diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 6f6981a30d7e4..9d55609d5db00 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -105,7 +105,6 @@ def generate_indices(self, f, values=False): if values is True , use the axis values is False, use the range """ - axes = f.axes if values: axes = (list(range(len(ax))) for ax in axes) @@ -114,7 +113,6 @@ def generate_indices(self, f, values=False): def get_value(self, name, f, i, values=False): """ return the value for the location i """ - # check against values if values: return f.values[i] @@ -150,7 +148,6 @@ def check_result( ): def _eq(axis, obj, key): """ compare equal for these 2 keys """ - axified = _axify(obj, key, axis) try: getattr(obj, method).__getitem__(axified) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index b3f6d65da5db5..f783c3516e357 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -505,7 +505,6 @@ def test_integer_positional_indexing(self): """ make sure that we are raising on positional indexing w.r.t. an integer index """ - s = Series(range(2, 6), index=range(2, 6)) result = s[2:4] diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index 67b767a337a89..f7583c93b9288 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -136,7 +136,6 @@ def _create_sp_frame(): def create_data(): """ create the pickle data """ - data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index bedd60084124c..e966db7a1cc71 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -33,7 +33,6 @@ def _clean_dict(d): ------- cleaned_dict : dict """ - return {str(k): v for k, v in d.items()} diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 652cacaf14ffb..3458cfb6ad254 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -114,7 +114,6 @@ def mock_clipboard(monkeypatch, request): This returns the local dictionary, for direct manipulation by tests. """ - # our local clipboard for tests _mock_data = {} diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index f76db9939641c..cfcf617cedf9c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -151,7 +151,6 @@ def check_round_trip( repeat: int, optional How many times to repeat the test """ - write_kwargs = write_kwargs or {"compression": None} read_kwargs = read_kwargs or {} diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index a604d90acc854..ea0ec8ad98ffe 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -92,7 +92,6 @@ def _check_legend_labels(self, axes, labels=None, visible=True): expected legend visibility. labels are checked only when visible is True """ - if visible and (labels is None): raise ValueError("labels must be specified when visible is True") axes = self._flatten_visible(axes) @@ -190,7 +189,6 @@ def _check_colors( Series used for color grouping key used for andrew_curves, parallel_coordinates, radviz test """ - from matplotlib.lines import Line2D from matplotlib.collections import Collection, PolyCollection, LineCollection diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 3aa7765954634..bf998a6e83909 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -119,7 +119,6 @@ def test_aaa_group_order(): def test_aggregate_normal(resample_method): """Check TimeGrouper's aggregation is identical as normal groupby.""" - if resample_method == "ohlc": pytest.xfail(reason="DataError: No numeric types to aggregate") diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 8037095aff0b9..9b5dea7663396 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -35,7 +35,6 @@ def setup_method(self, datapath): def test_examples1(self): """ doc-string examples """ - left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) @@ -48,7 +47,6 @@ def test_examples1(self): def test_examples2(self): """ doc-string examples """ - trades = pd.DataFrame( { "time": pd.to_datetime( diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py index 9075a4e791583..08614d04caf4b 100644 --- a/pandas/tests/reshape/merge/test_merge_index_as_string.py +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -82,7 +82,6 @@ def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, ho DataFrame The expected merge result """ - # Handle on param if specified if on is not None: left_on, right_on = on, on diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 83e4ee25558b5..fd18d37ab13b6 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -1335,7 +1335,6 @@ def test_rolling_kurt_eq_value_fperr(self): def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" - indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 2 datapoints on one of the days indices.append(datetime(1975, 1, 3, 6, 0)) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index e05cce9c49f4b..df1e750b32138 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -308,7 +308,6 @@ def apply_index(self, i): ------- y : DatetimeIndex """ - if type(self) is not DateOffset: raise NotImplementedError( f"DateOffset subclass {type(self).__name__} " diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 05f73a126feca..d854be062fcbb 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -55,7 +55,6 @@ def deprecate( The message to display in the warning. Default is '{name} is deprecated. Use {alt_name} instead.' """ - alt_name = alt_name or alternative.__name__ klass = klass or FutureWarning warning_msg = msg or f"{name} is deprecated, use {alt_name} instead" @@ -163,7 +162,6 @@ def deprecate_kwarg( future version please takes steps to stop use of 'cols' should raise warning """ - if mapping is not None and not hasattr(mapping, "get") and not callable(mapping): raise TypeError( "mapping from old to new argument values must be dict or callable!" From a4d743ed9d5212d043c3d21a1155a91765ca47f7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Feb 2020 08:19:12 -0800 Subject: [PATCH 011/388] ENH: support datetime64, datetime64tz in nanops.mean, nanops.median (#29941) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/frame.py | 21 ++++++++++++++++++++- pandas/core/nanops.py | 17 +++++++++++------ pandas/tests/frame/test_analytics.py | 18 +++++++++++++----- pandas/tests/test_nanops.py | 5 ++--- 5 files changed, 47 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8d7f817b83946..13827e8fc4c33 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -74,6 +74,7 @@ Backwards incompatible API changes Deprecations ~~~~~~~~~~~~ - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) +- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) - - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b7bec7f87e6e1..da152b70abd2e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7957,6 +7957,19 @@ def _count_level(self, level, axis=0, numeric_only=False): def _reduce( self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds ): + + dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M") + if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any(): + warnings.warn( + "DataFrame.mean and DataFrame.median with numeric_only=None " + "will include datetime64 and datetime64tz columns in a " + "future version.", + FutureWarning, + stacklevel=3, + ) + cols = self.columns[~dtype_is_dt] + self = self[cols] + if axis is None and filter_type == "bool": labels = None constructor = None @@ -7996,9 +8009,15 @@ def _get_data(axis_matters): out_dtype = "bool" if filter_type == "bool" else None + def blk_func(values): + if values.ndim == 1 and not isinstance(values, np.ndarray): + # we can't pass axis=1 + return op(values, axis=0, skipna=skipna, **kwds) + return op(values, axis=1, skipna=skipna, **kwds) + # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager._reduce - res = df._data.reduce(op, axis=1, skipna=skipna, **kwds) + res = df._data.reduce(blk_func) assert isinstance(res, dict) if len(res): assert len(res) == max(list(res.keys())) + 1, res.keys() diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 422afd061762b..6115c4af41b25 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -30,7 +30,6 @@ is_timedelta64_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") @@ -516,7 +515,6 @@ def nansum( return _wrap_results(the_sum, dtype) -@disallow("M8", DatetimeTZDtype) @bottleneck_switch() def nanmean(values, axis=None, skipna=True, mask=None): """ @@ -574,7 +572,6 @@ def nanmean(values, axis=None, skipna=True, mask=None): return _wrap_results(the_mean, dtype) -@disallow("M8") @bottleneck_switch() def nanmedian(values, axis=None, skipna=True, mask=None): """ @@ -607,8 +604,12 @@ def get_median(x): return np.nanmedian(x[mask]) values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask) - if not is_float_dtype(values): - values = values.astype("f8") + if not is_float_dtype(values.dtype): + try: + values = values.astype("f8") + except ValueError: + # e.g. "could not convert string to float: 'a'" + raise TypeError if mask is not None: values[mask] = np.nan @@ -1355,7 +1356,11 @@ def _ensure_numeric(x): try: x = x.astype(np.complex128) except (TypeError, ValueError): - x = x.astype(np.float64) + try: + x = x.astype(np.float64) + except ValueError: + # GH#29941 we get here with object arrays containing strs + raise TypeError(f"Could not convert {x} to numeric") else: if not np.any(np.imag(x)): x = x.real diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 54662c94baa05..61802956addeb 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -63,12 +63,15 @@ def assert_stat_op_calc( f = getattr(frame, opname) if check_dates: + expected_warning = FutureWarning if opname in ["mean", "median"] else None df = DataFrame({"b": date_range("1/1/2001", periods=2)}) - result = getattr(df, opname)() + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() assert isinstance(result, Series) df["a"] = range(len(df)) - result = getattr(df, opname)() + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() assert isinstance(result, Series) assert len(result) @@ -457,7 +460,8 @@ def test_nunique(self): def test_mean_mixed_datetime_numeric(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 df = pd.DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2}) - result = df.mean() + with tm.assert_produces_warning(FutureWarning): + result = df.mean() expected = pd.Series([1.0], index=["A"]) tm.assert_series_equal(result, expected) @@ -467,7 +471,9 @@ def test_mean_excludes_datetimes(self, tz): # Our long-term desired behavior is unclear, but the behavior in # 0.24.0rc1 was buggy. df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2}) - result = df.mean() + with tm.assert_produces_warning(FutureWarning): + result = df.mean() + expected = pd.Series(dtype=np.float64) tm.assert_series_equal(result, expected) @@ -863,7 +869,9 @@ def test_mean_datetimelike(self): expected = pd.Series({"A": 1.0}) tm.assert_series_equal(result, expected) - result = df.mean() + with tm.assert_produces_warning(FutureWarning): + # in the future datetime columns will be included + result = df.mean() expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]}) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 2c5d028ebe42e..f7e652eb78e2d 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -750,8 +750,8 @@ def test_ndarray(self): # Test non-convertible string ndarray s_values = np.array(["foo", "bar", "baz"], dtype=object) - msg = r"could not convert string to float: '(foo|baz)'" - with pytest.raises(ValueError, match=msg): + msg = r"Could not convert .* to numeric" + with pytest.raises(TypeError, match=msg): nanops._ensure_numeric(s_values) def test_convertable_values(self): @@ -993,7 +993,6 @@ def prng(self): class TestDatetime64NaNOps: @pytest.mark.parametrize("tz", [None, "UTC"]) - @pytest.mark.xfail(reason="disabled") # Enabling mean changes the behavior of DataFrame.mean # See https://github.com/pandas-dev/pandas/issues/24752 def test_nanmean(self, tz): From ee8b856dd23061834e75ba1e4f3e414497d2364f Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Wed, 12 Feb 2020 11:18:49 -0600 Subject: [PATCH 012/388] CLN: 29547 replace old string formatting 1 (#31914) --- .../tests/extension/decimal/test_decimal.py | 2 +- .../tests/frame/indexing/test_categorical.py | 6 ++-- pandas/tests/frame/methods/test_describe.py | 2 +- pandas/tests/frame/methods/test_duplicated.py | 4 +-- pandas/tests/frame/methods/test_to_dict.py | 4 +-- pandas/tests/frame/test_alter_axes.py | 3 +- pandas/tests/frame/test_api.py | 8 +++--- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_dtypes.py | 28 +++++++++---------- pandas/tests/frame/test_join.py | 2 +- 10 files changed, 29 insertions(+), 32 deletions(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index bd9b77a2bc419..a78e4bb34e42a 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -99,7 +99,7 @@ def assert_frame_equal(cls, left, right, *args, **kwargs): check_names=kwargs.get("check_names", True), check_exact=kwargs.get("check_exact", False), check_categorical=kwargs.get("check_categorical", True), - obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")), + obj=f"{kwargs.get('obj', 'DataFrame')}.columns", ) decimals = (left.dtypes == "decimal").index diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index a29c193676db2..3a472a8b58b6c 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -14,9 +14,7 @@ def test_assignment(self): df = DataFrame( {"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")} ) - labels = Categorical( - ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] - ) + labels = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) df = df.sort_values(by=["value"], ascending=True) s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) @@ -348,7 +346,7 @@ def test_assigning_ops(self): def test_functions_no_warnings(self): df = DataFrame({"value": np.random.randint(0, 100, 20)}) - labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] + labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)] with tm.assert_produces_warning(False): df["group"] = pd.cut( df.value, range(0, 105, 10), right=False, labels=labels diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index 127233ed2713e..8a75e80a12f52 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -86,7 +86,7 @@ def test_describe_bool_frame(self): def test_describe_categorical(self): df = DataFrame({"value": np.random.randint(0, 10000, 100)}) - labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) df = df.sort_values(by=["value"], ascending=True) diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py index 72eec8753315c..38b9d7fd049ab 100644 --- a/pandas/tests/frame/methods/test_duplicated.py +++ b/pandas/tests/frame/methods/test_duplicated.py @@ -22,9 +22,7 @@ def test_duplicated_do_not_fail_on_wide_dataframes(): # gh-21524 # Given the wide dataframe with a lot of columns # with different (important!) values - data = { - "col_{0:02d}".format(i): np.random.randint(0, 1000, 30000) for i in range(100) - } + data = {f"col_{i:02d}": np.random.randint(0, 1000, 30000) for i in range(100)} df = DataFrame(data).T result = df.duplicated() diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 7b0adceb57668..40393721c4ac6 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -236,9 +236,9 @@ def test_to_dict_numeric_names(self): def test_to_dict_wide(self): # GH#24939 - df = DataFrame({("A_{:d}".format(i)): [i] for i in range(256)}) + df = DataFrame({(f"A_{i:d}"): [i] for i in range(256)}) result = df.to_dict("records")[0] - expected = {"A_{:d}".format(i): i for i in range(256)} + expected = {f"A_{i:d}": i for i in range(256)} assert result == expected def test_to_dict_orient_dtype(self): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 602ea9ca0471a..0c19a38bb5fa2 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -382,8 +382,9 @@ class Thing(frozenset): # need to stabilize repr for KeyError (due to random order in sets) def __repr__(self) -> str: tmp = sorted(self) + joined_reprs = ", ".join(map(repr, tmp)) # double curly brace prints one brace in format string - return "frozenset({{{}}})".format(", ".join(map(repr, tmp))) + return f"frozenset({{{joined_reprs}}})" thing1 = Thing(["One", "red"]) thing2 = Thing(["Two", "blue"]) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 17cc50661e3cb..a021dd91a7d26 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -46,19 +46,19 @@ def test_get_value(self, float_frame): def test_add_prefix_suffix(self, float_frame): with_prefix = float_frame.add_prefix("foo#") - expected = pd.Index(["foo#{c}".format(c=c) for c in float_frame.columns]) + expected = pd.Index([f"foo#{c}" for c in float_frame.columns]) tm.assert_index_equal(with_prefix.columns, expected) with_suffix = float_frame.add_suffix("#foo") - expected = pd.Index(["{c}#foo".format(c=c) for c in float_frame.columns]) + expected = pd.Index([f"{c}#foo" for c in float_frame.columns]) tm.assert_index_equal(with_suffix.columns, expected) with_pct_prefix = float_frame.add_prefix("%") - expected = pd.Index(["%{c}".format(c=c) for c in float_frame.columns]) + expected = pd.Index([f"%{c}" for c in float_frame.columns]) tm.assert_index_equal(with_pct_prefix.columns, expected) with_pct_suffix = float_frame.add_suffix("%") - expected = pd.Index(["{c}%".format(c=c) for c in float_frame.columns]) + expected = pd.Index([f"{c}%" for c in float_frame.columns]) tm.assert_index_equal(with_pct_suffix.columns, expected) def test_get_axis(self, float_frame): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5f4c78449f71d..8c9b7cd060059 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -278,7 +278,7 @@ def test_constructor_ordereddict(self): nitems = 100 nums = list(range(nitems)) random.shuffle(nums) - expected = ["A{i:d}".format(i=i) for i in nums] + expected = [f"A{i:d}" for i in nums] df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) assert expected == list(df.columns) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 966f0d416676c..8b63f0614eebf 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -702,7 +702,7 @@ def test_astype_categorical(self, dtype): @pytest.mark.parametrize("cls", [CategoricalDtype, DatetimeTZDtype, IntervalDtype]) def test_astype_categoricaldtype_class_raises(self, cls): df = DataFrame({"A": ["a", "a", "b", "c"]}) - xpr = "Expected an instance of {}".format(cls.__name__) + xpr = f"Expected an instance of {cls.__name__}" with pytest.raises(TypeError, match=xpr): df.astype({"A": cls}) @@ -827,7 +827,7 @@ def test_df_where_change_dtype(self): def test_astype_from_datetimelike_to_objectt(self, dtype, unit): # tests astype to object dtype # gh-19223 / gh-12425 - dtype = "{}[{}]".format(dtype, unit) + dtype = f"{dtype}[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(object) @@ -844,7 +844,7 @@ def test_astype_from_datetimelike_to_objectt(self, dtype, unit): def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): # tests all units from numeric origination # gh-19223 / gh-12425 - dtype = "{}[{}]".format(dtype, unit) + dtype = f"{dtype}[{unit}]" arr = np.array([[1, 2, 3]], dtype=arr_dtype) df = DataFrame(arr) result = df.astype(dtype) @@ -856,7 +856,7 @@ def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): def test_astype_to_datetime_unit(self, unit): # tests all units from datetime origination # gh-19223 - dtype = "M8[{}]".format(unit) + dtype = f"M8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(dtype) @@ -868,7 +868,7 @@ def test_astype_to_datetime_unit(self, unit): def test_astype_to_timedelta_unit_ns(self, unit): # preserver the timedelta conversion # gh-19223 - dtype = "m8[{}]".format(unit) + dtype = f"m8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(dtype) @@ -880,7 +880,7 @@ def test_astype_to_timedelta_unit_ns(self, unit): def test_astype_to_timedelta_unit(self, unit): # coerce to float # gh-19223 - dtype = "m8[{}]".format(unit) + dtype = f"m8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(dtype) @@ -892,21 +892,21 @@ def test_astype_to_timedelta_unit(self, unit): def test_astype_to_incorrect_datetimelike(self, unit): # trying to astype a m to a M, or vice-versa # gh-19224 - dtype = "M8[{}]".format(unit) - other = "m8[{}]".format(unit) + dtype = f"M8[{unit}]" + other = f"m8[{unit}]" df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) msg = ( - r"cannot astype a datetimelike from \[datetime64\[ns\]\] to " - r"\[timedelta64\[{}\]\]" - ).format(unit) + fr"cannot astype a datetimelike from \[datetime64\[ns\]\] to " + fr"\[timedelta64\[{unit}\]\]" + ) with pytest.raises(TypeError, match=msg): df.astype(other) msg = ( - r"cannot astype a timedelta from \[timedelta64\[ns\]\] to " - r"\[datetime64\[{}\]\]" - ).format(unit) + fr"cannot astype a timedelta from \[timedelta64\[ns\]\] to " + fr"\[datetime64\[{unit}\]\]" + ) df = DataFrame(np.array([[1, 2, 3]], dtype=other)) with pytest.raises(TypeError, match=msg): df.astype(dtype) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index c6e28f3c64f12..8c388a887158f 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -161,7 +161,7 @@ def test_join_overlap(float_frame): def test_join_period_index(frame_with_period_index): - other = frame_with_period_index.rename(columns=lambda x: "{key}{key}".format(key=x)) + other = frame_with_period_index.rename(columns=lambda key: f"{key}{key}") joined_values = np.concatenate([frame_with_period_index.values] * 2, axis=1) From 48cb5a9517d6b9e19cca306800453cc32e26cc5a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Feb 2020 09:42:36 -0800 Subject: [PATCH 013/388] CLN: implement _getitem_tuple_same_dim (#31911) --- pandas/core/indexing.py | 49 ++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f498e1696ea5b..b3777e949a08c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -718,6 +718,25 @@ def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): return None + def _getitem_tuple_same_dim(self, tup: Tuple): + """ + Index with indexers that should return an object of the same dimension + as self.obj. + + This is only called after a failed call to _getitem_lowerdim. + """ + retval = self.obj + for i, key in enumerate(tup): + if com.is_null_slice(key): + continue + + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + # We should never have retval.ndim < self.ndim, as that should + # be handled by the _getitem_lowerdim call above. + assert retval.ndim == self.ndim + + return retval + def _getitem_lowerdim(self, tup: Tuple): # we can directly get the axis result since the axis is specified @@ -1049,15 +1068,7 @@ def _getitem_tuple(self, tup: Tuple): if self._multi_take_opportunity(tup): return self._multi_take(tup) - # no shortcut needed - retval = self.obj - for i, key in enumerate(tup): - if com.is_null_slice(key): - continue - - retval = getattr(retval, self.name)._getitem_axis(key, axis=i) - - return retval + return self._getitem_tuple_same_dim(tup) def _getitem_axis(self, key, axis: int): key = item_from_zerodim(key) @@ -1468,25 +1479,7 @@ def _getitem_tuple(self, tup: Tuple): except IndexingError: pass - retval = self.obj - axis = 0 - for i, key in enumerate(tup): - if com.is_null_slice(key): - axis += 1 - continue - - retval = getattr(retval, self.name)._getitem_axis(key, axis=axis) - - # if the dim was reduced, then pass a lower-dim the next time - if retval.ndim < self.ndim: - # TODO: this is never reached in tests; can we confirm that - # it is impossible? - axis -= 1 - - # try to get for the next axis - axis += 1 - - return retval + return self._getitem_tuple_same_dim(tup) def _get_list_axis(self, key, axis: int): """ From 8a7fbbeb8e3a88f8e355093eb1b68f361e65b6aa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Feb 2020 13:47:26 -0800 Subject: [PATCH 014/388] TST: parametrize generic/internals tests (#31900) --- pandas/tests/generic/test_frame.py | 94 ++++---- pandas/tests/generic/test_generic.py | 72 +++---- pandas/tests/generic/test_series.py | 45 ++-- pandas/tests/internals/test_internals.py | 260 ++++++++++------------- 4 files changed, 203 insertions(+), 268 deletions(-) diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index d8f4257566f84..dca65152e82db 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -32,19 +32,20 @@ def test_rename_mi(self): ) df.rename(str.lower) - def test_set_axis_name(self): + @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) + def test_set_axis_name(self, func): df = pd.DataFrame([[1, 2], [3, 4]]) - funcs = ["_set_axis_name", "rename_axis"] - for func in funcs: - result = methodcaller(func, "foo")(df) - assert df.index.name is None - assert result.index.name == "foo" - result = methodcaller(func, "cols", axis=1)(df) - assert df.columns.name is None - assert result.columns.name == "cols" + result = methodcaller(func, "foo")(df) + assert df.index.name is None + assert result.index.name == "foo" - def test_set_axis_name_mi(self): + result = methodcaller(func, "cols", axis=1)(df) + assert df.columns.name is None + assert result.columns.name == "cols" + + @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) + def test_set_axis_name_mi(self, func): df = DataFrame( np.empty((3, 3)), index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]), @@ -52,15 +53,14 @@ def test_set_axis_name_mi(self): ) level_names = ["L1", "L2"] - funcs = ["_set_axis_name", "rename_axis"] - for func in funcs: - result = methodcaller(func, level_names)(df) - assert result.index.names == level_names - assert result.columns.names == [None, None] - result = methodcaller(func, level_names, axis=1)(df) - assert result.columns.names == ["L1", "L2"] - assert result.index.names == [None, None] + result = methodcaller(func, level_names)(df) + assert result.index.names == level_names + assert result.columns.names == [None, None] + + result = methodcaller(func, level_names, axis=1)(df) + assert result.columns.names == ["L1", "L2"] + assert result.index.names == [None, None] def test_nonzero_single_element(self): @@ -185,36 +185,35 @@ def test_deepcopy_empty(self): # formerly in Generic but only test DataFrame class TestDataFrame2: - def test_validate_bool_args(self): + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - invalid_values = [1, "True", [1, 2, 3], 5.0] - for value in invalid_values: - with pytest.raises(ValueError): - super(DataFrame, df).rename_axis( - mapper={"a": "x", "b": "y"}, axis=1, inplace=value - ) + with pytest.raises(ValueError): + super(DataFrame, df).rename_axis( + mapper={"a": "x", "b": "y"}, axis=1, inplace=value + ) - with pytest.raises(ValueError): - super(DataFrame, df).drop("a", axis=1, inplace=value) + with pytest.raises(ValueError): + super(DataFrame, df).drop("a", axis=1, inplace=value) - with pytest.raises(ValueError): - super(DataFrame, df)._consolidate(inplace=value) + with pytest.raises(ValueError): + super(DataFrame, df)._consolidate(inplace=value) - with pytest.raises(ValueError): - super(DataFrame, df).fillna(value=0, inplace=value) + with pytest.raises(ValueError): + super(DataFrame, df).fillna(value=0, inplace=value) - with pytest.raises(ValueError): - super(DataFrame, df).replace(to_replace=1, value=7, inplace=value) + with pytest.raises(ValueError): + super(DataFrame, df).replace(to_replace=1, value=7, inplace=value) - with pytest.raises(ValueError): - super(DataFrame, df).interpolate(inplace=value) + with pytest.raises(ValueError): + super(DataFrame, df).interpolate(inplace=value) - with pytest.raises(ValueError): - super(DataFrame, df)._where(cond=df.a > 2, inplace=value) + with pytest.raises(ValueError): + super(DataFrame, df)._where(cond=df.a > 2, inplace=value) - with pytest.raises(ValueError): - super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + with pytest.raises(ValueError): + super(DataFrame, df).mask(cond=df.a > 2, inplace=value) def test_unexpected_keyword(self): # GH8597 @@ -243,23 +242,10 @@ class TestToXArray: and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"), reason="xarray >= 0.10.0 required", ) - @pytest.mark.parametrize( - "index", - [ - "FloatIndex", - "IntIndex", - "StringIndex", - "UnicodeIndex", - "DateIndex", - "PeriodIndex", - "CategoricalIndex", - "TimedeltaIndex", - ], - ) + @pytest.mark.parametrize("index", tm.all_index_generator(3)) def test_to_xarray_index_types(self, index): from xarray import Dataset - index = getattr(tm, f"make{index}") df = DataFrame( { "a": list("abc"), @@ -273,7 +259,7 @@ def test_to_xarray_index_types(self, index): } ) - df.index = index(3) + df.index = index df.index.name = "foo" df.columns.name = "bar" result = df.to_xarray() diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 02d803795e79c..8e54de771a3e4 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -257,39 +257,31 @@ def test_metadata_propagation(self): self.check_metadata(v1 & v2) self.check_metadata(v1 | v2) - def test_head_tail(self): + @pytest.mark.parametrize("index", tm.all_index_generator(10)) + def test_head_tail(self, index): # GH5370 o = self._construct(shape=10) - # check all index types - for index in [ - tm.makeFloatIndex, - tm.makeIntIndex, - tm.makeStringIndex, - tm.makeUnicodeIndex, - tm.makeDateIndex, - tm.makePeriodIndex, - ]: - axis = o._get_axis_name(0) - setattr(o, axis, index(len(getattr(o, axis)))) + axis = o._get_axis_name(0) + setattr(o, axis, index) - o.head() + o.head() - self._compare(o.head(), o.iloc[:5]) - self._compare(o.tail(), o.iloc[-5:]) + self._compare(o.head(), o.iloc[:5]) + self._compare(o.tail(), o.iloc[-5:]) - # 0-len - self._compare(o.head(0), o.iloc[0:0]) - self._compare(o.tail(0), o.iloc[0:0]) + # 0-len + self._compare(o.head(0), o.iloc[0:0]) + self._compare(o.tail(0), o.iloc[0:0]) - # bounded - self._compare(o.head(len(o) + 1), o) - self._compare(o.tail(len(o) + 1), o) + # bounded + self._compare(o.head(len(o) + 1), o) + self._compare(o.tail(len(o) + 1), o) - # neg index - self._compare(o.head(-3), o.head(7)) - self._compare(o.tail(-3), o.tail(7)) + # neg index + self._compare(o.head(-3), o.head(7)) + self._compare(o.tail(-3), o.tail(7)) def test_sample(self): # Fixes issue: 2419 @@ -468,16 +460,16 @@ def test_stat_unexpected_keyword(self): with pytest.raises(TypeError, match=errmsg): obj.any(epic=starwars) # logical_function - def test_api_compat(self): + @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"]) + def test_api_compat(self, func): # GH 12021 # compat for __name__, __qualname__ obj = self._construct(5) - for func in ["sum", "cumsum", "any", "var"]: - f = getattr(obj, func) - assert f.__name__ == func - assert f.__qualname__.endswith(func) + f = getattr(obj, func) + assert f.__name__ == func + assert f.__qualname__.endswith(func) def test_stat_non_defaults_args(self): obj = self._construct(5) @@ -510,19 +502,17 @@ def test_truncate_out_of_bounds(self): self._compare(big.truncate(before=0, after=3e6), big) self._compare(big.truncate(before=-1, after=2e6), big) - def test_copy_and_deepcopy(self): + @pytest.mark.parametrize( + "func", + [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], + ) + @pytest.mark.parametrize("shape", [0, 1, 2]) + def test_copy_and_deepcopy(self, shape, func): # GH 15444 - for shape in [0, 1, 2]: - obj = self._construct(shape) - for func in [ - copy, - deepcopy, - lambda x: x.copy(deep=False), - lambda x: x.copy(deep=True), - ]: - obj_copy = func(obj) - assert obj_copy is not obj - self._compare(obj_copy, obj) + obj = self._construct(shape) + obj_copy = func(obj) + assert obj_copy is not obj + self._compare(obj_copy, obj) @pytest.mark.parametrize( "periods,fill_method,limit,exp", diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index ce0daf8522687..5aafd83da78fd 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -38,29 +38,29 @@ def test_rename_mi(self): ) s.rename(str.lower) - def test_set_axis_name(self): + @pytest.mark.parametrize("func", ["rename_axis", "_set_axis_name"]) + def test_set_axis_name(self, func): s = Series([1, 2, 3], index=["a", "b", "c"]) - funcs = ["rename_axis", "_set_axis_name"] name = "foo" - for func in funcs: - result = methodcaller(func, name)(s) - assert s.index.name is None - assert result.index.name == name - def test_set_axis_name_mi(self): + result = methodcaller(func, name)(s) + assert s.index.name is None + assert result.index.name == name + + @pytest.mark.parametrize("func", ["rename_axis", "_set_axis_name"]) + def test_set_axis_name_mi(self, func): s = Series( [11, 21, 31], index=MultiIndex.from_tuples( [("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"] ), ) - funcs = ["rename_axis", "_set_axis_name"] - for func in funcs: - result = methodcaller(func, ["L1", "L2"])(s) - assert s.index.name is None - assert s.index.names == ["l1", "l2"] - assert result.index.name is None - assert result.index.names, ["L1", "L2"] + + result = methodcaller(func, ["L1", "L2"])(s) + assert s.index.name is None + assert s.index.names == ["l1", "l2"] + assert result.index.name is None + assert result.index.names, ["L1", "L2"] def test_set_axis_name_raises(self): s = pd.Series([1]) @@ -230,24 +230,11 @@ class TestToXArray: and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"), reason="xarray >= 0.10.0 required", ) - @pytest.mark.parametrize( - "index", - [ - "FloatIndex", - "IntIndex", - "StringIndex", - "UnicodeIndex", - "DateIndex", - "PeriodIndex", - "TimedeltaIndex", - "CategoricalIndex", - ], - ) + @pytest.mark.parametrize("index", tm.all_index_generator(6)) def test_to_xarray_index_types(self, index): from xarray import DataArray - index = getattr(tm, f"make{index}") - s = Series(range(6), index=index(6)) + s = Series(range(6), index=index) s.index.name = "foo" result = s.to_xarray() repr(result) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index aa966caa63238..fe161a0da791a 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -376,9 +376,6 @@ def test_pickle(self, mgr): mgr2 = tm.round_trip_pickle(mgr) tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) - # share ref_items - # assert mgr2.blocks[0].ref_items is mgr2.blocks[1].ref_items - # GH2431 assert hasattr(mgr2, "_is_consolidated") assert hasattr(mgr2, "_known_consolidated") @@ -789,40 +786,39 @@ def test_equals(self): bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) assert bm1.equals(bm2) - def test_equals_block_order_different_dtypes(self): - # GH 9330 - - mgr_strings = [ + @pytest.mark.parametrize( + "mgr_string", + [ "a:i8;b:f8", # basic case "a:i8;b:f8;c:c8;d:b", # many types "a:i8;e:dt;f:td;g:string", # more types "a:i8;b:category;c:category2;d:category2", # categories "c:sparse;d:sparse_na;b:f8", # sparse - ] - - for mgr_string in mgr_strings: - bm = create_mgr(mgr_string) - block_perms = itertools.permutations(bm.blocks) - for bm_perm in block_perms: - bm_this = BlockManager(bm_perm, bm.axes) - assert bm.equals(bm_this) - assert bm_this.equals(bm) + ], + ) + def test_equals_block_order_different_dtypes(self, mgr_string): + # GH 9330 + bm = create_mgr(mgr_string) + block_perms = itertools.permutations(bm.blocks) + for bm_perm in block_perms: + bm_this = BlockManager(bm_perm, bm.axes) + assert bm.equals(bm_this) + assert bm_this.equals(bm) def test_single_mgr_ctor(self): mgr = create_single_mgr("f8", num_rows=5) assert mgr.as_array().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0] - def test_validate_bool_args(self): - invalid_values = [1, "True", [1, 2, 3], 5.0] + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, value): bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2") - for value in invalid_values: - msg = ( - 'For argument "inplace" expected type bool, ' - f"received type {type(value).__name__}." - ) - with pytest.raises(ValueError, match=msg): - bm1.replace_list([1], [2], inplace=value) + msg = ( + 'For argument "inplace" expected type bool, ' + f"received type {type(value).__name__}." + ) + with pytest.raises(ValueError, match=msg): + bm1.replace_list([1], [2], inplace=value) class TestIndexing: @@ -851,7 +847,8 @@ class TestIndexing: # MANAGERS = [MANAGERS[6]] - def test_get_slice(self): + @pytest.mark.parametrize("mgr", MANAGERS) + def test_get_slice(self, mgr): def assert_slice_ok(mgr, axis, slobj): mat = mgr.as_array() @@ -870,35 +867,33 @@ def assert_slice_ok(mgr, axis, slobj): ) tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis]) - for mgr in self.MANAGERS: - for ax in range(mgr.ndim): - # slice - assert_slice_ok(mgr, ax, slice(None)) - assert_slice_ok(mgr, ax, slice(3)) - assert_slice_ok(mgr, ax, slice(100)) - assert_slice_ok(mgr, ax, slice(1, 4)) - assert_slice_ok(mgr, ax, slice(3, 0, -2)) - - # boolean mask - assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_)) - assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_)) - assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_)) - - if mgr.shape[ax] >= 3: - assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) - assert_slice_ok( - mgr, ax, np.array([True, True, False], dtype=np.bool_) - ) - - # fancy indexer - assert_slice_ok(mgr, ax, []) - assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) - - if mgr.shape[ax] >= 3: - assert_slice_ok(mgr, ax, [0, 1, 2]) - assert_slice_ok(mgr, ax, [-1, -2, -3]) - - def test_take(self): + for ax in range(mgr.ndim): + # slice + assert_slice_ok(mgr, ax, slice(None)) + assert_slice_ok(mgr, ax, slice(3)) + assert_slice_ok(mgr, ax, slice(100)) + assert_slice_ok(mgr, ax, slice(1, 4)) + assert_slice_ok(mgr, ax, slice(3, 0, -2)) + + # boolean mask + assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_)) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok(mgr, ax, np.array([True, True, False], dtype=np.bool_)) + + # fancy indexer + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) + + @pytest.mark.parametrize("mgr", MANAGERS) + def test_take(self, mgr): def assert_take_ok(mgr, axis, indexer): mat = mgr.as_array() taken = mgr.take(indexer, axis) @@ -907,18 +902,19 @@ def assert_take_ok(mgr, axis, indexer): ) tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis]) - for mgr in self.MANAGERS: - for ax in range(mgr.ndim): - # take/fancy indexer - assert_take_ok(mgr, ax, indexer=[]) - assert_take_ok(mgr, ax, indexer=[0, 0, 0]) - assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax]))) + for ax in range(mgr.ndim): + # take/fancy indexer + assert_take_ok(mgr, ax, indexer=[]) + assert_take_ok(mgr, ax, indexer=[0, 0, 0]) + assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax]))) - if mgr.shape[ax] >= 3: - assert_take_ok(mgr, ax, indexer=[0, 1, 2]) - assert_take_ok(mgr, ax, indexer=[-1, -2, -3]) + if mgr.shape[ax] >= 3: + assert_take_ok(mgr, ax, indexer=[0, 1, 2]) + assert_take_ok(mgr, ax, indexer=[-1, -2, -3]) - def test_reindex_axis(self): + @pytest.mark.parametrize("mgr", MANAGERS) + @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0]) + def test_reindex_axis(self, fill_value, mgr): def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): mat = mgr.as_array() indexer = mgr.axes[axis].get_indexer_for(new_labels) @@ -931,33 +927,27 @@ def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): ) tm.assert_index_equal(reindexed.axes[axis], new_labels) - for mgr in self.MANAGERS: - for ax in range(mgr.ndim): - for fill_value in (None, np.nan, 100.0): - assert_reindex_axis_is_ok(mgr, ax, pd.Index([]), fill_value) - assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value) - assert_reindex_axis_is_ok( - mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value - ) - assert_reindex_axis_is_ok( - mgr, ax, pd.Index(["foo", "bar", "baz"]), fill_value - ) - assert_reindex_axis_is_ok( - mgr, ax, pd.Index(["foo", mgr.axes[ax][0], "baz"]), fill_value - ) + for ax in range(mgr.ndim): + assert_reindex_axis_is_ok(mgr, ax, pd.Index([]), fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, pd.Index(["foo", "bar", "baz"]), fill_value + ) + assert_reindex_axis_is_ok( + mgr, ax, pd.Index(["foo", mgr.axes[ax][0], "baz"]), fill_value + ) + + if mgr.shape[ax] >= 3: + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][:-3], fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax][-3::-1], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value + ) - if mgr.shape[ax] >= 3: - assert_reindex_axis_is_ok( - mgr, ax, mgr.axes[ax][:-3], fill_value - ) - assert_reindex_axis_is_ok( - mgr, ax, mgr.axes[ax][-3::-1], fill_value - ) - assert_reindex_axis_is_ok( - mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value - ) - - def test_reindex_indexer(self): + @pytest.mark.parametrize("mgr", MANAGERS) + @pytest.mark.parametrize("fill_value", [None, np.nan, 100.0]) + def test_reindex_indexer(self, fill_value, mgr): def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): mat = mgr.as_array() reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value) @@ -969,60 +959,42 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): ) tm.assert_index_equal(reindexed.axes[axis], new_labels) - for mgr in self.MANAGERS: - for ax in range(mgr.ndim): - for fill_value in (None, np.nan, 100.0): - assert_reindex_indexer_is_ok(mgr, ax, pd.Index([]), [], fill_value) - assert_reindex_indexer_is_ok( - mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value - ) - assert_reindex_indexer_is_ok( - mgr, - ax, - pd.Index(["foo"] * mgr.shape[ax]), - np.arange(mgr.shape[ax]), - fill_value, - ) - assert_reindex_indexer_is_ok( - mgr, - ax, - mgr.axes[ax][::-1], - np.arange(mgr.shape[ax]), - fill_value, - ) - assert_reindex_indexer_is_ok( - mgr, - ax, - mgr.axes[ax], - np.arange(mgr.shape[ax])[::-1], - fill_value, - ) - assert_reindex_indexer_is_ok( - mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 0, 0], fill_value - ) - assert_reindex_indexer_is_ok( - mgr, - ax, - pd.Index(["foo", "bar", "baz"]), - [-1, 0, -1], - fill_value, - ) - assert_reindex_indexer_is_ok( - mgr, - ax, - pd.Index(["foo", mgr.axes[ax][0], "baz"]), - [-1, -1, -1], - fill_value, - ) + for ax in range(mgr.ndim): + assert_reindex_indexer_is_ok(mgr, ax, pd.Index([]), [], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + pd.Index(["foo"] * mgr.shape[ax]), + np.arange(mgr.shape[ax]), + fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 0, 0], fill_value + ) + assert_reindex_indexer_is_ok( + mgr, ax, pd.Index(["foo", "bar", "baz"]), [-1, 0, -1], fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + pd.Index(["foo", mgr.axes[ax][0], "baz"]), + [-1, -1, -1], + fill_value, + ) - if mgr.shape[ax] >= 3: - assert_reindex_indexer_is_ok( - mgr, - ax, - pd.Index(["foo", "bar", "baz"]), - [0, 1, 2], - fill_value, - ) + if mgr.shape[ax] >= 3: + assert_reindex_indexer_is_ok( + mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 1, 2], fill_value, + ) # test_get_slice(slice_like, axis) # take(indexer, axis) From 50dad9cbe582644226f3d3a401abdc18c0fa975c Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Wed, 12 Feb 2020 23:54:38 -0600 Subject: [PATCH 015/388] CLN: 29547 replace old string formatting 2 (#31933) --- pandas/tests/frame/test_operators.py | 4 ++-- pandas/tests/frame/test_reshape.py | 4 +++- pandas/tests/frame/test_timeseries.py | 4 ++-- pandas/tests/indexes/datetimes/test_scalar_compat.py | 6 +++--- pandas/tests/indexes/datetimes/test_tools.py | 4 ++-- pandas/tests/indexes/interval/test_indexing.py | 12 ++---------- pandas/tests/indexes/interval/test_interval.py | 2 +- 7 files changed, 15 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 162f3c114fa5d..df40c2e7e2a11 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -840,8 +840,8 @@ def test_inplace_ops_identity2(self, op): df["a"] = [True, False, True] df_copy = df.copy() - iop = "__i{}__".format(op) - op = "__{}__".format(op) + iop = f"__i{op}__" + op = f"__{op}__" # no id change and value is correct getattr(df, iop)(operand) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index b3af5a7b7317e..46a4a0a2af4ba 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -765,7 +765,9 @@ def test_unstack_unused_level(self, cols): tm.assert_frame_equal(result, expected) def test_unstack_nan_index(self): # GH7466 - cast = lambda val: "{0:1}".format("" if val != val else val) + def cast(val): + val_str = "" if val != val else val + return f"{val_str:1}" def verify(df): mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index e89f4ee07ea00..5e06b6402c34f 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -54,7 +54,7 @@ def test_frame_append_datetime64_col_other_units(self): ns_dtype = np.dtype("M8[ns]") for unit in units: - dtype = np.dtype("M8[{unit}]".format(unit=unit)) + dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) @@ -70,7 +70,7 @@ def test_frame_append_datetime64_col_other_units(self): df["dates"] = np.arange(n, dtype=np.int64).view(ns_dtype) for unit in units: - dtype = np.dtype("M8[{unit}]".format(unit=unit)) + dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) tmp = df.copy() diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 84eee2419f0b8..21ee8649172da 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -248,21 +248,21 @@ def test_round_int64(self, start, index_freq, periods, round_freq): result = dt.floor(round_freq) diff = dt.asi8 - result.asi8 mod = result.asi8 % unit - assert (mod == 0).all(), "floor not a {} multiple".format(round_freq) + assert (mod == 0).all(), f"floor not a {round_freq} multiple" assert (0 <= diff).all() and (diff < unit).all(), "floor error" # test ceil result = dt.ceil(round_freq) diff = result.asi8 - dt.asi8 mod = result.asi8 % unit - assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq) + assert (mod == 0).all(), f"ceil not a {round_freq} multiple" assert (0 <= diff).all() and (diff < unit).all(), "ceil error" # test round result = dt.round(round_freq) diff = abs(result.asi8 - dt.asi8) mod = result.asi8 % unit - assert (mod == 0).all(), "round not a {} multiple".format(round_freq) + assert (mod == 0).all(), f"round not a {round_freq} multiple" assert (diff <= unit // 2).all(), "round error" if unit % 2 == 0: assert ( diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index df3a49fb7c292..13723f6455bff 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -199,7 +199,7 @@ def test_to_datetime_format_microsecond(self, cache): # these are locale dependent lang, _ = locale.getlocale() month_abbr = calendar.month_abbr[4] - val = "01-{}-2011 00:00:01.978".format(month_abbr) + val = f"01-{month_abbr}-2011 00:00:01.978" format = "%d-%b-%Y %H:%M:%S.%f" result = to_datetime(val, format=format, cache=cache) @@ -551,7 +551,7 @@ def test_to_datetime_dt64s(self, cache): ) @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): - msg = "Out of bounds nanosecond timestamp: {}".format(dt) + msg = f"Out of bounds nanosecond timestamp: {dt}" with pytest.raises(OutOfBoundsDatetime, match=msg): pd.to_datetime(dt, errors="raise") with pytest.raises(OutOfBoundsDatetime, match=msg): diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py index 87b72f702e2aa..0e5721bfd83fd 100644 --- a/pandas/tests/indexes/interval/test_indexing.py +++ b/pandas/tests/indexes/interval/test_indexing.py @@ -24,11 +24,7 @@ def test_get_loc_interval(self, closed, side): for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: # if get_loc is supplied an interval, it should only search # for exact matches, not overlaps or covers, else KeyError. - msg = re.escape( - "Interval({bound[0]}, {bound[1]}, closed='{side}')".format( - bound=bound, side=side - ) - ) + msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')") if closed == side: if bound == [0, 1]: assert idx.get_loc(Interval(0, 1, closed=side)) == 0 @@ -86,11 +82,7 @@ def test_get_loc_length_one_interval(self, left, right, closed, other_closed): else: with pytest.raises( KeyError, - match=re.escape( - "Interval({left}, {right}, closed='{other_closed}')".format( - left=left, right=right, other_closed=other_closed - ) - ), + match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"), ): index.get_loc(interval) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index d010060880703..c2b209c810af9 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -845,7 +845,7 @@ def test_set_closed(self, name, closed, new_closed): def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) - msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) + msg = f"invalid option for 'closed': {bad_closed}" with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) From 56cc7f4f3f8b864f0ba50e5c70746bd4815fd3e7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Thu, 13 Feb 2020 02:16:33 -0600 Subject: [PATCH 016/388] BUG: Handle NA in assert_numpy_array_equal (#31910) --- pandas/_libs/lib.pyx | 2 ++ .../util/test_assert_numpy_array_equal.py | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index d2f0b2ffbaeec..1990ef66a6bf1 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -571,6 +571,8 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: if PyArray_Check(x) and PyArray_Check(y): if not array_equivalent_object(x, y): return False + elif (x is C_NA) ^ (y is C_NA): + return False elif not (PyObject_RichCompareBool(x, y, Py_EQ) or (x is None or is_nan(x)) and (y is None or is_nan(y))): return False diff --git a/pandas/tests/util/test_assert_numpy_array_equal.py b/pandas/tests/util/test_assert_numpy_array_equal.py index c8ae9ebdd8651..d29ddedd2fdd6 100644 --- a/pandas/tests/util/test_assert_numpy_array_equal.py +++ b/pandas/tests/util/test_assert_numpy_array_equal.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import Timestamp import pandas._testing as tm @@ -175,3 +176,38 @@ def test_numpy_array_equal_copy_flag(other_type, check_same): tm.assert_numpy_array_equal(a, other, check_same=check_same) else: tm.assert_numpy_array_equal(a, other, check_same=check_same) + + +def test_numpy_array_equal_contains_na(): + # https://github.com/pandas-dev/pandas/issues/31881 + a = np.array([True, False]) + b = np.array([True, pd.NA], dtype=object) + + msg = """numpy array are different + +numpy array values are different \\(50.0 %\\) +\\[left\\]: \\[True, False\\] +\\[right\\]: \\[True, \\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) + + +def test_numpy_array_equal_identical_na(nulls_fixture): + a = np.array([nulls_fixture], dtype=object) + + tm.assert_numpy_array_equal(a, a) + + +def test_numpy_array_equal_different_na(): + a = np.array([np.nan], dtype=object) + b = np.array([pd.NA], dtype=object) + + msg = """numpy array are different + +numpy array values are different \\(100.0 %\\) +\\[left\\]: \\[nan\\] +\\[right\\]: \\[\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) From 4ac1e5ff015430795ca89f01f9a2ed38038f9563 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Feb 2020 04:42:54 -0800 Subject: [PATCH 017/388] CLN: assorted cleanups (#31938) --- pandas/core/arrays/datetimes.py | 3 ++- pandas/core/generic.py | 1 + pandas/core/internals/managers.py | 1 - pandas/core/ops/array_ops.py | 4 ++-- pandas/core/series.py | 2 +- pandas/tests/indexing/test_chaining_and_caching.py | 11 ++++------- 6 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 5888600d2fa8e..a75536e46e60d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -283,7 +283,8 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): @classmethod def _simple_new(cls, values, freq=None, dtype=_NS_DTYPE): assert isinstance(values, np.ndarray) - if values.dtype == "i8": + if values.dtype != _NS_DTYPE: + assert values.dtype == "i8" values = values.view(_NS_DTYPE) result = object.__new__(cls) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 934c4c6e92bbe..adfb553d40ff0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3504,6 +3504,7 @@ def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries: Slicing with this method is *always* positional. """ + assert isinstance(slobj, slice), type(slobj) axis = self._get_block_manager_axis(axis) result = self._constructor(self._data.get_slice(slobj, axis=axis)) result = result.__finalize__(self) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3dc7dd7d81530..37a4b43648bb1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1316,7 +1316,6 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): return blocks def _make_na_block(self, placement, fill_value=None): - # TODO: infer dtypes other than float64 from fill_value if fill_value is None: fill_value = np.nan diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 5d53856729d0c..37a4a6eddaebe 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -43,9 +43,9 @@ def comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) - # TODO: Should the checks below be ABCIndexClass? if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): - # TODO: should this be ABCIndexClass?? + # Note: these checks can be for ABCIndex and not ABCIndexClass + # because that is the only object-dtype class. if not is_object_dtype(y.dtype): y = y.astype(np.object_) diff --git a/pandas/core/series.py b/pandas/core/series.py index 24e794014a15f..8577a7fb904dc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4378,7 +4378,7 @@ def between(self, left, right, inclusive=True) -> "Series": # Convert to types that support pd.NA def _convert_dtypes( - self: ABCSeries, + self, infer_objects: bool = True, convert_string: bool = True, convert_integer: bool = True, diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index e845487ffca9a..17722e949df1e 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -346,20 +346,17 @@ def test_chained_getitem_with_lists(self): # GH6394 # Regression in chained getitem indexing with embedded list-like from # 0.12 - def check(result, expected): - tm.assert_numpy_array_equal(result, expected) - assert isinstance(result, np.ndarray) df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]}) expected = df["A"].iloc[2] result = df.loc[2, "A"] - check(result, expected) + tm.assert_numpy_array_equal(result, expected) result2 = df.iloc[2]["A"] - check(result2, expected) + tm.assert_numpy_array_equal(result2, expected) result3 = df["A"].loc[2] - check(result3, expected) + tm.assert_numpy_array_equal(result3, expected) result4 = df["A"].iloc[2] - check(result4, expected) + tm.assert_numpy_array_equal(result4, expected) def test_cache_updating(self): # GH 4939, make sure to update the cache on setitem From 95b0e142619c9a0f335c24d71f4ac976891a6021 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 13 Feb 2020 14:39:46 +0100 Subject: [PATCH 018/388] TST: expand tests for ExtensionArray setitem with nullable arrays (#31741) --- pandas/tests/extension/base/setitem.py | 99 ++++++++++++++++++++++---- pandas/tests/extension/test_numpy.py | 46 ++++++++++++ 2 files changed, 133 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 590bcd586900a..af70799c0236e 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas.core.arrays.numpy_ import PandasDtype +import pandas._testing as tm from .base import BaseExtensionTests @@ -93,6 +93,92 @@ def test_setitem_iloc_scalar_multiple_homogoneous(self, data): df.iloc[10, 1] = data[1] assert df.loc[10, "B"] == data[1] + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + arr = data[:5].copy() + expected = arr.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + arr[mask] = data[0] + self.assert_equal(expected, arr) + + def test_setitem_mask_raises(self, data, box_in_series): + # wrong length + mask = np.array([True, False]) + + if box_in_series: + data = pd.Series(data) + + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + mask = pd.array(mask, dtype="boolean") + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + def test_setitem_mask_boolean_array_raises(self, data, box_in_series): + # missing values in mask + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:2] = pd.NA + + if box_in_series: + data = pd.Series(data) + + msg = ( + "Cannot mask with a boolean indexer containing NA values|" + "cannot mask with array containing NA / NaN values" + ) + with pytest.raises(ValueError, match=msg): + data[mask] = data[0] + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[idx] = arr[0] + self.assert_equal(arr, expected) + + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param( + [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") + ), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + arr = data.copy() + + # TODO(xfail) this raises KeyError about labels not found (it tries label-based) + # for list of labels with Series + if box_in_series: + arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + arr[idx] = arr[0] + @pytest.mark.parametrize("as_callable", [True, False]) @pytest.mark.parametrize("setter", ["loc", None]) def test_setitem_mask_aligned(self, data, as_callable, setter): @@ -219,14 +305,3 @@ def test_setitem_preserves_views(self, data): data[0] = data[1] assert view1[0] == data[1] assert view2[0] == data[1] - - def test_setitem_nullable_mask(self, data): - # GH 31446 - # TODO: there is some issue with PandasArray, therefore, - # TODO: skip the setitem test for now, and fix it later - if data.dtype != PandasDtype("object"): - arr = data[:5] - expected = data.take([0, 0, 0, 3, 4]) - mask = pd.array([True, True, True, False, False]) - arr[mask] = data[0] - self.assert_extension_array_equal(expected, arr) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 76573242a2506..80a093530a8cd 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -396,6 +396,52 @@ def test_setitem_scalar_key_sequence_raise(self, data): # Failed: DID NOT RAISE super().test_setitem_scalar_key_sequence_raise(data) + # TODO: there is some issue with PandasArray, therefore, + # skip the setitem test for now, and fix it later (GH 31446) + + @skip_nested + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + super().test_setitem_mask(data, mask, box_in_series) + + @skip_nested + def test_setitem_mask_raises(self, data, box_in_series): + super().test_setitem_mask_raises(data, box_in_series) + + @skip_nested + def test_setitem_mask_boolean_array_raises(self, data, box_in_series): + super().test_setitem_mask_boolean_array_raises(data, box_in_series) + + @skip_nested + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + super().test_setitem_integer_array(data, idx, box_in_series) + + @skip_nested + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + super().test_setitem_integer_with_missing_raises(data, idx, box_in_series) + @skip_nested def test_setitem_slice(self, data, box_in_series): super().test_setitem_slice(data, box_in_series) From e8eb49de60e8a9e301dd7a2dd82a54e1f3476801 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 13 Feb 2020 19:35:29 +0000 Subject: [PATCH 019/388] remove blocking return (#31960) --- pandas/tests/indexes/multi/test_copy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 1acc65aef8b8a..67b815ecba3b8 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -80,7 +80,6 @@ def test_copy_method_kwargs(deep, kwarg, value): codes=[[0, 0, 0, 1], [0, 0, 1, 1]], names=["first", "second"], ) - return idx_copy = idx.copy(**{kwarg: value, "deep": deep}) if kwarg == "names": assert getattr(idx_copy, kwarg) == value From bcfc608f61ac0ac719822ea303336c0d053d9d9b Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 13 Feb 2020 20:06:56 +0000 Subject: [PATCH 020/388] D412: No blank lines allowed between a section header and its content (#31956) --- pandas/_config/config.py | 5 +-- pandas/_testing.py | 60 ++++++++++++++-------------- pandas/core/accessor.py | 1 - pandas/core/arrays/boolean.py | 2 - pandas/core/arrays/categorical.py | 1 - pandas/core/arrays/sparse/dtype.py | 1 - pandas/core/computation/pytables.py | 1 - pandas/core/dtypes/concat.py | 2 - pandas/core/frame.py | 1 - pandas/core/generic.py | 9 ----- pandas/core/groupby/groupby.py | 2 - pandas/core/groupby/grouper.py | 1 - pandas/core/indexes/base.py | 4 -- pandas/core/indexes/multi.py | 2 - pandas/core/indexes/period.py | 1 - pandas/core/indexes/timedeltas.py | 1 - pandas/core/indexing.py | 2 - pandas/core/resample.py | 1 - pandas/core/strings.py | 3 -- pandas/core/tools/timedeltas.py | 1 - pandas/core/window/ewm.py | 1 - pandas/core/window/expanding.py | 1 - pandas/core/window/rolling.py | 1 - pandas/io/formats/style.py | 2 - pandas/io/json/_json.py | 1 - pandas/io/json/_normalize.py | 2 - pandas/io/pytables.py | 2 - pandas/plotting/_core.py | 5 +-- pandas/plotting/_matplotlib/tools.py | 12 +++--- pandas/plotting/_misc.py | 1 + pandas/tests/extension/json/array.py | 4 +- 31 files changed, 44 insertions(+), 89 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index c283baeb9d412..2df940817498c 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -395,7 +395,6 @@ class option_context: Examples -------- - >>> with option_context('display.max_rows', 10, 'display.max_columns', 5): ... ... """ @@ -716,8 +715,8 @@ def config_prefix(prefix): Warning: This is not thread - safe, and won't work properly if you import the API functions into your module using the "from x import y" construct. - Example: - + Example + ------- import pandas._config.config as cf with cf.config_prefix("display.font"): cf.register_option("color", "red") diff --git a/pandas/_testing.py b/pandas/_testing.py index 46ed65c87e8dd..6029fbe59bbcd 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1970,35 +1970,39 @@ def makeCustomDataframe( r_idx_type=None, ): """ - nrows, ncols - number of data rows/cols - c_idx_names, idx_names - False/True/list of strings, yields No names , - default names or uses the provided names for the levels of the - corresponding index. You can provide a single string when - c_idx_nlevels ==1. - c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex - r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex - data_gen_f - a function f(row,col) which return the data value - at that position, the default generator used yields values of the form - "RxCy" based on position. - c_ndupe_l, r_ndupe_l - list of integers, determines the number - of duplicates for each label at a given level of the corresponding - index. The default `None` value produces a multiplicity of 1 across - all levels, i.e. a unique index. Will accept a partial list of length - N < idx_nlevels, for just the first N levels. If ndupe doesn't divide - nrows/ncol, the last label might have lower multiplicity. - dtype - passed to the DataFrame constructor as is, in case you wish to - have more control in conjunction with a custom `data_gen_f` - r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". - If idx_type is not None, `idx_nlevels` must be 1. - "i"/"f" creates an integer/float index, - "s"/"u" creates a string/unicode index - "dt" create a datetime index. - "td" create a timedelta index. - - if unspecified, string labels will be generated. + Create a DataFrame using supplied parameters. - Examples: + Parameters + ---------- + nrows, ncols - number of data rows/cols + c_idx_names, idx_names - False/True/list of strings, yields No names , + default names or uses the provided names for the levels of the + corresponding index. You can provide a single string when + c_idx_nlevels ==1. + c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex + r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex + data_gen_f - a function f(row,col) which return the data value + at that position, the default generator used yields values of the form + "RxCy" based on position. + c_ndupe_l, r_ndupe_l - list of integers, determines the number + of duplicates for each label at a given level of the corresponding + index. The default `None` value produces a multiplicity of 1 across + all levels, i.e. a unique index. Will accept a partial list of length + N < idx_nlevels, for just the first N levels. If ndupe doesn't divide + nrows/ncol, the last label might have lower multiplicity. + dtype - passed to the DataFrame constructor as is, in case you wish to + have more control in conjunction with a custom `data_gen_f` + r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s"/"u" creates a string/unicode index + "dt" create a datetime index. + "td" create a timedelta index. + + if unspecified, string labels will be generated. + Examples + -------- # 5 row, 3 columns, default names on both, single index on both axis >> makeCustomDataframe(5,3) @@ -2514,7 +2518,6 @@ class RNGContext: Examples -------- - with RNGContext(42): np.random.randn() """ @@ -2669,7 +2672,6 @@ def set_timezone(tz: str): Examples -------- - >>> from datetime import datetime >>> from dateutil.tz import tzlocal >>> tzlocal().tzname(datetime.now()) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 4e3ef0c52bbdd..fc40f1db1918a 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -233,7 +233,6 @@ def __init__(self, pandas_object): # noqa: E999 Examples -------- - In your library code:: import pandas as pd diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 590b40b0434e5..d93b5fbc83312 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -488,7 +488,6 @@ def any(self, skipna: bool = True, **kwargs): Examples -------- - The result indicates whether any element is True (and by default skips NAs): @@ -557,7 +556,6 @@ def all(self, skipna: bool = True, **kwargs): Examples -------- - The result indicates whether any element is True (and by default skips NAs): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b095288acca90..6c7c35e9b4763 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2388,7 +2388,6 @@ def isin(self, values): Examples -------- - >>> s = pd.Categorical(['lama', 'cow', 'lama', 'beetle', 'lama', ... 'hippo']) >>> s.isin(['cow', 'lama']) diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 1ce735421e7d6..86869f50aab8e 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -336,7 +336,6 @@ def _subtype_with_str(self): Returns ------- - >>> SparseDtype(int, 1)._subtype_with_str dtype('int64') diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 19f151846a080..097c3c22aa6c3 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -501,7 +501,6 @@ class PyTablesExpr(expr.Expr): Examples -------- - 'index>=date' "columns=['A', 'D']" 'columns=A' diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index e53eb3b4d8e71..003c3505885bb 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -215,13 +215,11 @@ def union_categoricals( Notes ----- - To learn more about categories, see `link `__ Examples -------- - >>> from pandas.api.types import union_categoricals If you want to combine categoricals that do not necessarily have diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da152b70abd2e..2f9c8286d2988 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -927,7 +927,6 @@ def iterrows(self) -> Iterable[Tuple[Optional[Hashable], Series]]: Notes ----- - 1. Because ``iterrows`` returns a Series for each row, it does **not** preserve dtypes across the rows (dtypes are preserved across columns for DataFrames). For example, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index adfb553d40ff0..0ea8da0da9c6d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -899,7 +899,6 @@ def rename( Examples -------- - >>> s = pd.Series([1, 2, 3]) >>> s 0 1 @@ -2208,7 +2207,6 @@ def to_json( Examples -------- - >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], ... index=['row 1', 'row 2'], ... columns=['col 1', 'col 2']) @@ -2507,7 +2505,6 @@ def to_sql( Examples -------- - Create an in-memory SQLite database. >>> from sqlalchemy import create_engine @@ -4185,7 +4182,6 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: Examples -------- - ``DataFrame.reindex`` supports two calling conventions * ``(index=index_labels, columns=column_labels, ...)`` @@ -5768,7 +5764,6 @@ def convert_dtypes( Notes ----- - By default, ``convert_dtypes`` will attempt to convert a Series (or each Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is @@ -7434,7 +7429,6 @@ def asfreq( Examples -------- - Start by creating a series with 4 one minute timestamps. >>> index = pd.date_range('1/1/2000', periods=4, freq='T') @@ -7713,7 +7707,6 @@ def resample( Examples -------- - Start by creating a series with 9 one minute timestamps. >>> index = pd.date_range('1/1/2000', periods=9, freq='T') @@ -8100,7 +8093,6 @@ def rank( Examples -------- - >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', ... 'spider', 'snake'], ... 'Number_legs': [4, 2, 4, 8, np.nan]}) @@ -9235,7 +9227,6 @@ def tz_localize( Examples -------- - Localize local times: >>> s = pd.Series([1], diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 426b3b47d9530..1d7527e73079c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1995,7 +1995,6 @@ def ngroup(self, ascending: bool = True): Examples -------- - >>> df = pd.DataFrame({"A": list("aaabba")}) >>> df A @@ -2062,7 +2061,6 @@ def cumcount(self, ascending: bool = True): Examples -------- - >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], ... columns=['A']) >>> df diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 8a42a8fa297cd..21e171f937de8 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -77,7 +77,6 @@ class Grouper: Examples -------- - Syntactic sugar for ``df.groupby('A')`` >>> df.groupby(Grouper(key='A')) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f3bae63aa7e03..3d549405592d6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1452,7 +1452,6 @@ def _get_level_values(self, level): Examples -------- - >>> idx = pd.Index(list('abc')) >>> idx Index(['a', 'b', 'c'], dtype='object') @@ -2501,7 +2500,6 @@ def union(self, other, sort=None): Examples -------- - Union matching dtypes >>> idx1 = pd.Index([1, 2, 3, 4]) @@ -2632,7 +2630,6 @@ def intersection(self, other, sort=False): Examples -------- - >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.intersection(idx2) @@ -2713,7 +2710,6 @@ def difference(self, other, sort=None): Examples -------- - >>> idx1 = pd.Index([2, 1, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6fa42804d2e39..5b357af0d3244 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1549,7 +1549,6 @@ def get_level_values(self, level): Examples -------- - Create a MultiIndex: >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) @@ -1713,7 +1712,6 @@ def _sort_levels_monotonic(self): Examples -------- - >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) >>> mi diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 987725bb4b70b..986f87ffe3734 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -788,7 +788,6 @@ def period_range( Examples -------- - >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 4a69570f1844c..b3b2bc46f6659 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -321,7 +321,6 @@ def timedelta_range( Examples -------- - >>> pd.timedelta_range(start='1 day', periods=4) TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq='D') diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b3777e949a08c..cb8b9cc04fc24 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -48,7 +48,6 @@ class _IndexSlice: Examples -------- - >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']]) >>> columns = ['foo', 'bar'] >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))), @@ -124,7 +123,6 @@ def iloc(self) -> "_iLocIndexer": Examples -------- - >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 98910a9baf962..f19a82ab6f86a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -550,7 +550,6 @@ def backfill(self, limit=None): Examples -------- - Resampling a Series: >>> s = pd.Series([1, 2, 3], diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 3a7e3fdab5dca..4b0fc3e47356c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -349,7 +349,6 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): Examples -------- - Returning a Series of booleans using only a literal pattern. >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN]) @@ -1274,7 +1273,6 @@ def str_findall(arr, pat, flags=0): Examples -------- - >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) The search for the pattern 'Monkey' returns one match: @@ -1743,7 +1741,6 @@ def str_wrap(arr, width, **kwargs): Examples -------- - >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped']) >>> s.str.wrap(12) 0 line to be\nwrapped diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 1d933cf431b4b..d7529ec799022 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -53,7 +53,6 @@ def to_timedelta(arg, unit="ns", errors="raise"): Examples -------- - Parsing a single string to a Timedelta: >>> pd.to_timedelta('1 days 06:05:01.00003') diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 6da8b0c5ccadd..e045d1c2211d7 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -98,7 +98,6 @@ class EWM(_Rolling): Examples -------- - >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) >>> df B diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index a0bf3376d2352..140e0144d0a2d 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -37,7 +37,6 @@ class Expanding(_Rolling_and_Expanding): Examples -------- - >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) B 0 0.0 diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f29cd428b7bad..65ac064a1322e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -846,7 +846,6 @@ class Window(_Window): Examples -------- - >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) >>> df B diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index eca5a3fb18e60..9c46a0036ab0d 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -462,7 +462,6 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> "Style Notes ----- - ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where ``a`` is one of @@ -474,7 +473,6 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> "Style Examples -------- - >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) >>> df.style.format("{:.2%}") >>> df['c'] = ['a', 'b', 'c', 'd'] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 39ee097bc743b..77a0c2f99496b 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -525,7 +525,6 @@ def read_json( Examples -------- - >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], ... index=['row 1', 'row 2'], ... columns=['col 1', 'col 2']) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 08dca6b573a2f..714bebc260c06 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -61,7 +61,6 @@ def nested_to_record( Examples -------- - IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2), nested=dict(e=dict(c=1,d=2),d=2))) Out[52]: @@ -160,7 +159,6 @@ def _json_normalize( Examples -------- - >>> from pandas.io.json import json_normalize >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, ... {'name': {'given': 'Mose', 'family': 'Regner'}}, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a75819d33d967..1390d2d514a5e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1830,7 +1830,6 @@ class IndexCol: Parameters ---------- - axis : axis which I reference values : the ndarray like converted values kind : a string description of this type @@ -2142,7 +2141,6 @@ class DataCol(IndexCol): Parameters ---------- - data : the actual data cname : the column name in the table to hold the data (typically values) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 1fe383706f74d..d3db539084609 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -176,13 +176,12 @@ def hist_frame( Examples -------- + This example draws a histogram based on the length and width of + some animals, displayed in three bins .. plot:: :context: close-figs - This example draws a histogram based on the length and width of - some animals, displayed in three bins - >>> df = pd.DataFrame({ ... 'length': [1.5, 0.5, 1.2, 0.9, 3], ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index d7732c86911b8..dafdd6eecabc0 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -105,8 +105,8 @@ def _subplots( This utility wrapper makes it convenient to create common layouts of subplots, including the enclosing figure object, in a single call. - Keyword arguments: - + Parameters + ---------- naxes : int Number of required axes. Exceeded axes are set invisible. Default is nrows * ncols. @@ -146,16 +146,16 @@ def _subplots( Note that all keywords not recognized above will be automatically included here. - Returns: - + Returns + ------- fig, ax : tuple - fig is the Matplotlib Figure object - ax can be either a single axis object or an array of axis objects if more than one subplot was created. The dimensions of the resulting array can be controlled with the squeeze keyword, see above. - **Examples:** - + Examples + -------- x = np.linspace(0, 2*np.pi, 400) y = np.sin(x**2) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 1369adcd80269..47a4fd8ff0e95 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -294,6 +294,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): Examples -------- + This example draws a basic bootstap plot for a Series. .. plot:: :context: close-figs diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 1ba1b872fa5e2..e6b147e7a4ce7 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -3,8 +3,8 @@ The JSONArray stores lists of dictionaries. The storage mechanism is a list, not an ndarray. -Note: - +Note +---- We currently store lists of UserDicts. Pandas has a few places internally that specifically check for dicts, and does non-scalar things in that case. We *want* the dictionaries to be treated as scalars, so we From c67407dbd602ead4b809b57ce100e3a554ef482a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 13 Feb 2020 20:07:34 +0000 Subject: [PATCH 021/388] D409: Section underline should match the length of its name (#31958) --- pandas/core/indexes/multi.py | 2 +- pandas/tests/arithmetic/common.py | 2 +- pandas/util/_test_decorators.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5b357af0d3244..984fe2286eb6f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1685,7 +1685,7 @@ def _lexsort_depth(self) -> int: MultiIndex that are sorted lexically Returns - ------ + ------- int """ int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index 83d19b8a20ac3..ccc49adc5da82 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -13,7 +13,7 @@ def assert_invalid_addsub_type(left, right, msg=None): Helper to assert that left and right can be neither added nor subtracted. Parameters - --------- + ---------- left : object right : object msg : str or None, default None diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index cd7fdd55a4d2c..25394dc6775d8 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -40,15 +40,15 @@ def test_foo(): def safe_import(mod_name: str, min_version: Optional[str] = None): """ - Parameters: - ----------- + Parameters + ---------- mod_name : str Name of the module to be imported min_version : str, default None Minimum required version of the specified mod_name - Returns: - -------- + Returns + ------- object The imported module if successful, or False """ From c8f32cb4c0f35293899843958995fcd38ed064de Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 13 Feb 2020 20:10:24 +0000 Subject: [PATCH 022/388] D411: Missing blank line before section (#31959) --- pandas/_testing.py | 1 + pandas/core/indexes/datetimelike.py | 2 ++ pandas/io/excel/_openpyxl.py | 4 ++++ pandas/io/excel/_util.py | 1 + pandas/util/_validators.py | 1 + 5 files changed, 9 insertions(+) diff --git a/pandas/_testing.py b/pandas/_testing.py index 6029fbe59bbcd..01d2bfe0458c8 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -2593,6 +2593,7 @@ def test_parallel(num_threads=2, kwargs_list=None): kwargs_list : list of dicts, optional The list of kwargs to update original function kwargs on different threads. + Notes ----- This decorator does not pass the return value of the decorated function. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d505778d18c52..1b3b6934aa53a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -937,12 +937,14 @@ def _wrap_joined_index(self, joined, other): def insert(self, loc, item): """ Make new Index inserting new item at location + Parameters ---------- loc : int item : object if not either a Python datetime or a numpy integer-like, returned Index dtype will be object rather than datetime. + Returns ------- new_index : Index diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index ab2d97e6026d1..d35d466e6c5c9 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -366,11 +366,13 @@ def _convert_to_number_format(cls, number_format_dict): """ Convert ``number_format_dict`` to an openpyxl v2.1.0 number format initializer. + Parameters ---------- number_format_dict : dict A dict with zero or more of the following keys. 'format_code' : str + Returns ------- number_format : str @@ -381,12 +383,14 @@ def _convert_to_number_format(cls, number_format_dict): def _convert_to_protection(cls, protection_dict): """ Convert ``protection_dict`` to an openpyxl v2 Protection object. + Parameters ---------- protection_dict : dict A dict with zero or more of the following keys. 'locked' 'hidden' + Returns ------- """ diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 9d284c8031840..a33406b6e80d7 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -174,6 +174,7 @@ def _fill_mi_header(row, control_row): """Forward fill blank entries in row but only inside the same parent index. Used for creating headers in Multiindex. + Parameters ---------- row : list diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index a715094e65e98..bfcfd1c5a7101 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -91,6 +91,7 @@ def validate_args(fname, args, max_fname_arg_count, compat_args): arguments **positionally** internally when calling downstream implementations, a dict ensures that the original order of the keyword arguments is enforced. + Raises ------ TypeError From 870ef1eced8e7bf6eaa76837692d489b3d68c6a9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 13 Feb 2020 20:11:14 +0000 Subject: [PATCH 023/388] CLN: D414: Section has no content (#31961) --- pandas/core/frame.py | 2 ++ pandas/core/generic.py | 3 --- pandas/core/indexes/multi.py | 3 +++ pandas/core/internals/managers.py | 4 +++- pandas/core/series.py | 2 ++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2f9c8286d2988..8edda99ed6df8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3820,6 +3820,8 @@ def align( @Appender( """ + Examples + -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) Change the row labels. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0ea8da0da9c6d..1a16f8792e9e7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -579,9 +579,6 @@ def set_axis(self, labels, axis=0, inplace=False): See Also -------- %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. - - Examples - -------- """ if inplace: setattr(self, self._get_axis_name(axis), labels) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 984fe2286eb6f..02e11c0e71cbe 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2138,6 +2138,9 @@ def reorder_levels(self, order): Parameters ---------- + order : list of int or list of str + List representing new level order. Reference level by number + (position) or by key (label). Returns ------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 37a4b43648bb1..fb20b5e89ccf3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -101,7 +101,9 @@ class BlockManager(PandasObject): Parameters ---------- - + blocks: Sequence of Block + axes: Sequence of Index + do_integrity_check: bool, default True Notes ----- diff --git a/pandas/core/series.py b/pandas/core/series.py index 8577a7fb904dc..48fe86dd5e9c9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4006,6 +4006,8 @@ def rename( @Appender( """ + Examples + -------- >>> s = pd.Series([1, 2, 3]) >>> s 0 1 From 72bc92e7ba459557803dc479d0f527f84d7ed230 Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Thu, 13 Feb 2020 14:34:47 -0600 Subject: [PATCH 024/388] CLN: 29547 replace old string formatting 3 (#31945) --- pandas/tests/indexes/interval/test_setops.py | 4 +- pandas/tests/indexes/multi/test_compat.py | 2 +- .../tests/indexes/period/test_constructors.py | 2 +- .../indexes/timedeltas/test_constructors.py | 2 +- pandas/tests/indexing/test_floats.py | 92 +++++++++---------- 5 files changed, 50 insertions(+), 52 deletions(-) diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index 3246ac6bafde9..d9359d717de1d 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -180,8 +180,8 @@ def test_set_incompatible_types(self, closed, op_name, sort): # GH 19016: incompatible dtypes other = interval_range(Timestamp("20180101"), periods=9, closed=closed) msg = ( - "can only do {op} between two IntervalIndex objects that have " + f"can only do {op_name} between two IntervalIndex objects that have " "compatible dtypes" - ).format(op=op_name) + ) with pytest.raises(TypeError, match=msg): set_op(other, sort=sort) diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 9a76f0623eb31..ef549beccda5d 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -29,7 +29,7 @@ def test_numeric_compat(idx): @pytest.mark.parametrize("method", ["all", "any"]) def test_logical_compat(idx, method): - msg = "cannot perform {method}".format(method=method) + msg = f"cannot perform {method}" with pytest.raises(TypeError, match=msg): getattr(idx, method)() diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index fcbadce3d63b1..418f53591b913 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -364,7 +364,7 @@ def test_constructor_year_and_quarter(self): year = pd.Series([2001, 2002, 2003]) quarter = year - 2000 idx = PeriodIndex(year=year, quarter=quarter) - strs = ["{t[0]:d}Q{t[1]:d}".format(t=t) for t in zip(quarter, year)] + strs = [f"{t[0]:d}Q{t[1]:d}" for t in zip(quarter, year)] lops = list(map(Period, strs)) p = PeriodIndex(lops) tm.assert_index_equal(p, idx) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 0de10b5d82171..8e54561df1624 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -155,7 +155,7 @@ def test_constructor(self): def test_constructor_iso(self): # GH #21877 expected = timedelta_range("1s", periods=9, freq="s") - durations = ["P0DT0H0M{}S".format(i) for i in range(1, 10)] + durations = [f"P0DT0H0M{i}S" for i in range(1, 10)] result = to_timedelta(durations) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index f783c3516e357..80a4d81b20a13 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -53,8 +53,8 @@ def test_scalar_error(self, index_func): s.iloc[3.0] msg = ( - "cannot do positional indexing on {klass} with these " - r"indexers \[3\.0\] of type float".format(klass=type(i).__name__) + fr"cannot do positional indexing on {type(i).__name__} with these " + r"indexers \[3\.0\] of type float" ) with pytest.raises(TypeError, match=msg): s.iloc[3.0] = 0 @@ -95,10 +95,10 @@ def test_scalar_non_numeric(self, index_func): error = TypeError msg = ( r"cannot do (label|positional) indexing " - r"on {klass} with these indexers \[3\.0\] of " + fr"on {type(i).__name__} with these indexers \[3\.0\] of " r"type float|" "Cannot index by location index with a " - "non-integer key".format(klass=type(i).__name__) + "non-integer key" ) with pytest.raises(error, match=msg): idxr(s)[3.0] @@ -116,8 +116,8 @@ def test_scalar_non_numeric(self, index_func): error = TypeError msg = ( r"cannot do (label|positional) indexing " - r"on {klass} with these indexers \[3\.0\] of " - r"type float".format(klass=type(i).__name__) + fr"on {type(i).__name__} with these indexers \[3\.0\] of " + "type float" ) with pytest.raises(error, match=msg): s.loc[3.0] @@ -128,8 +128,8 @@ def test_scalar_non_numeric(self, index_func): # setting with a float fails with iloc msg = ( r"cannot do (label|positional) indexing " - r"on {klass} with these indexers \[3\.0\] of " - r"type float".format(klass=type(i).__name__) + fr"on {type(i).__name__} with these indexers \[3\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s.iloc[3.0] = 0 @@ -165,8 +165,8 @@ def test_scalar_non_numeric(self, index_func): s[3] msg = ( r"cannot do (label|positional) indexing " - r"on {klass} with these indexers \[3\.0\] of " - r"type float".format(klass=type(i).__name__) + fr"on {type(i).__name__} with these indexers \[3\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[3.0] @@ -181,12 +181,10 @@ def test_scalar_with_mixed(self): for idxr in [lambda x: x, lambda x: x.iloc]: msg = ( - r"cannot do label indexing " - r"on {klass} with these indexers \[1\.0\] of " + "cannot do label indexing " + fr"on {Index.__name__} with these indexers \[1\.0\] of " r"type float|" - "Cannot index by location index with a non-integer key".format( - klass=Index.__name__ - ) + "Cannot index by location index with a non-integer key" ) with pytest.raises(TypeError, match=msg): idxr(s2)[1.0] @@ -203,9 +201,9 @@ def test_scalar_with_mixed(self): for idxr in [lambda x: x]: msg = ( - r"cannot do label indexing " - r"on {klass} with these indexers \[1\.0\] of " - r"type float".format(klass=Index.__name__) + "cannot do label indexing " + fr"on {Index.__name__} with these indexers \[1\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): idxr(s3)[1.0] @@ -321,9 +319,9 @@ def test_scalar_float(self): s.iloc[3.0] msg = ( - r"cannot do positional indexing " - r"on {klass} with these indexers \[3\.0\] of " - r"type float".format(klass=Float64Index.__name__) + "cannot do positional indexing " + fr"on {Float64Index.__name__} with these indexers \[3\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s2.iloc[3.0] = 0 @@ -355,8 +353,8 @@ def test_slice_non_numeric(self, index_func): msg = ( "cannot do positional indexing " - r"on {klass} with these indexers \[(3|4)\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s.iloc[l] @@ -365,9 +363,9 @@ def test_slice_non_numeric(self, index_func): msg = ( "cannot do (slice|positional) indexing " - r"on {klass} with these indexers " + fr"on {type(index).__name__} with these indexers " r"\[(3|4)(\.0)?\] " - r"of type (float|int)".format(klass=type(index).__name__) + r"of type (float|int)" ) with pytest.raises(TypeError, match=msg): idxr(s)[l] @@ -377,8 +375,8 @@ def test_slice_non_numeric(self, index_func): msg = ( "cannot do positional indexing " - r"on {klass} with these indexers \[(3|4)\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s.iloc[l] = 0 @@ -386,9 +384,9 @@ def test_slice_non_numeric(self, index_func): for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: msg = ( "cannot do (slice|positional) indexing " - r"on {klass} with these indexers " + fr"on {type(index).__name__} with these indexers " r"\[(3|4)(\.0)?\] " - r"of type (float|int)".format(klass=type(index).__name__) + r"of type (float|int)" ) with pytest.raises(TypeError, match=msg): idxr(s)[l] = 0 @@ -427,8 +425,8 @@ def test_slice_integer(self): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[(3|4)\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[l] @@ -451,8 +449,8 @@ def test_slice_integer(self): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[-6\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[-6\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[slice(-6.0, 6.0)] @@ -477,8 +475,8 @@ def test_slice_integer(self): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[(2|3)\.5\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[(2|3)\.5\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[l] @@ -495,8 +493,8 @@ def test_slice_integer(self): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[(3|4)\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[l] = 0 @@ -518,8 +516,8 @@ def test_integer_positional_indexing(self): klass = RangeIndex msg = ( "cannot do (slice|positional) indexing " - r"on {klass} with these indexers \[(2|4)\.0\] of " - "type float".format(klass=klass.__name__) + fr"on {klass.__name__} with these indexers \[(2|4)\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): idxr(s)[l] @@ -546,8 +544,8 @@ def f(idxr): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[(0|1)\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[(0|1)\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[l] @@ -561,8 +559,8 @@ def f(idxr): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[-10\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[-10\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[slice(-10.0, 10.0)] @@ -580,8 +578,8 @@ def f(idxr): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[0\.5\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[0\.5\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[l] @@ -597,8 +595,8 @@ def f(idxr): # positional indexing msg = ( "cannot do slice indexing " - r"on {klass} with these indexers \[(3|4)\.0\] of " - "type float".format(klass=type(index).__name__) + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" ) with pytest.raises(TypeError, match=msg): s[l] = 0 From 32b3d9e39f91c5172ba9b5f7cde0efcc481b0c63 Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Thu, 13 Feb 2020 19:04:46 -0600 Subject: [PATCH 025/388] CLN: 29547 replace old string formatting 4 (#31963) --- pandas/tests/internals/test_internals.py | 12 +++++------- pandas/tests/io/excel/test_readers.py | 5 +++-- pandas/tests/io/excel/test_style.py | 9 +++------ pandas/tests/io/excel/test_writers.py | 6 +++--- pandas/tests/io/excel/test_xlrd.py | 2 +- pandas/tests/io/formats/test_console.py | 4 ++-- pandas/tests/io/formats/test_to_html.py | 6 +++--- pandas/tests/io/formats/test_to_latex.py | 12 +++++------- pandas/tests/io/generate_legacy_storage_files.py | 8 ++++---- 9 files changed, 29 insertions(+), 35 deletions(-) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index fe161a0da791a..0c9ddbf5473b3 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -91,9 +91,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): elif typestr in ("complex", "c16", "c8"): values = 1.0j * (mat.astype(typestr) + num_offset) elif typestr in ("object", "string", "O"): - values = np.reshape( - ["A{i:d}".format(i=i) for i in mat.ravel() + num_offset], shape - ) + values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape) elif typestr in ("b", "bool"): values = np.ones(shape, dtype=np.bool_) elif typestr in ("datetime", "dt", "M8[ns]"): @@ -101,7 +99,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): elif typestr.startswith("M8[ns"): # datetime with tz m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr) - assert m is not None, "incompatible typestr -> {0}".format(typestr) + assert m is not None, f"incompatible typestr -> {typestr}" tz = m.groups()[0] assert num_items == 1, "must have only 1 num items for a tz-aware" values = DatetimeIndex(np.arange(N) * 1e9, tz=tz) @@ -607,9 +605,9 @@ def test_interleave(self): # self for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]: - mgr = create_mgr("a: {0}".format(dtype)) + mgr = create_mgr(f"a: {dtype}") assert mgr.as_array().dtype == dtype - mgr = create_mgr("a: {0}; b: {0}".format(dtype)) + mgr = create_mgr(f"a: {dtype}; b: {dtype}") assert mgr.as_array().dtype == dtype # will be converted according the actual dtype of the underlying @@ -1136,7 +1134,7 @@ def __array__(self): return np.array(self.value, dtype=self.dtype) def __str__(self) -> str: - return "DummyElement({}, {})".format(self.value, self.dtype) + return f"DummyElement({self.value}, {self.dtype})" def __repr__(self) -> str: return str(self) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 8d00ef1b7fe3e..a59b409809eed 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -596,7 +596,8 @@ def test_read_from_file_url(self, read_ext, datapath): # fails on some systems import platform - pytest.skip("failing on {}".format(" ".join(platform.uname()).strip())) + platform_info = " ".join(platform.uname()).strip() + pytest.skip(f"failing on {platform_info}") tm.assert_frame_equal(url_table, local_table) @@ -957,7 +958,7 @@ def test_excel_passes_na_filter(self, read_ext, na_filter): def test_unexpected_kwargs_raises(self, read_ext, arg): # gh-17964 kwarg = {arg: "Sheet1"} - msg = r"unexpected keyword argument `{}`".format(arg) + msg = fr"unexpected keyword argument `{arg}`" with pd.ExcelFile("test1" + read_ext) as excel: with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 88f4c3736bc0d..31b033f381f0c 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -45,10 +45,7 @@ def style(df): def assert_equal_style(cell1, cell2, engine): if engine in ["xlsxwriter", "openpyxl"]: pytest.xfail( - reason=( - "GH25351: failing on some attribute " - "comparisons in {}".format(engine) - ) + reason=(f"GH25351: failing on some attribute comparisons in {engine}") ) # XXX: should find a better way to check equality assert cell1.alignment.__dict__ == cell2.alignment.__dict__ @@ -108,7 +105,7 @@ def custom_converter(css): for col1, col2 in zip(wb["frame"].columns, wb["styled"].columns): assert len(col1) == len(col2) for cell1, cell2 in zip(col1, col2): - ref = "{cell2.column}{cell2.row:d}".format(cell2=cell2) + ref = f"{cell2.column}{cell2.row:d}" # XXX: this isn't as strong a test as ideal; we should # confirm that differences are exclusive if ref == "B2": @@ -156,7 +153,7 @@ def custom_converter(css): for col1, col2 in zip(wb["frame"].columns, wb["custom"].columns): assert len(col1) == len(col2) for cell1, cell2 in zip(col1, col2): - ref = "{cell2.column}{cell2.row:d}".format(cell2=cell2) + ref = f"{cell2.column}{cell2.row:d}" if ref in ("B2", "C3", "D4", "B5", "C6", "D7", "B8", "B9"): assert not cell1.font.bold assert cell2.font.bold diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 91665a24fc4c5..506d223dbedb4 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -41,7 +41,7 @@ def set_engine(engine, ext): which engine should be used to write Excel files. After executing the test it rolls back said change to the global option. """ - option_name = "io.excel.{ext}.writer".format(ext=ext.strip(".")) + option_name = f"io.excel.{ext.strip('.')}.writer" prev_engine = get_option(option_name) set_option(option_name, engine) yield @@ -1206,7 +1206,7 @@ def test_path_path_lib(self, engine, ext): writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel, index_col=0) - result = tm.round_trip_pathlib(writer, reader, path="foo.{ext}".format(ext=ext)) + result = tm.round_trip_pathlib(writer, reader, path=f"foo.{ext}") tm.assert_frame_equal(result, df) def test_path_local_path(self, engine, ext): @@ -1214,7 +1214,7 @@ def test_path_local_path(self, engine, ext): writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel, index_col=0) - result = tm.round_trip_pathlib(writer, reader, path="foo.{ext}".format(ext=ext)) + result = tm.round_trip_pathlib(writer, reader, path=f"foo.{ext}") tm.assert_frame_equal(result, df) def test_merged_cell_custom_objects(self, merge_cells, path): diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index cc7e2311f362a..d456afe4ed351 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -37,7 +37,7 @@ def test_read_xlrd_book(read_ext, frame): # TODO: test for openpyxl as well def test_excel_table_sheet_by_index(datapath, read_ext): - path = datapath("io", "data", "excel", "test1{}".format(read_ext)) + path = datapath("io", "data", "excel", f"test1{read_ext}") with pd.ExcelFile(path) as excel: with pytest.raises(xlrd.XLRDError): pd.read_excel(excel, "asdf") diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index e56d14885f11e..b57a2393461a2 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -34,8 +34,8 @@ def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled): # they have values filled. # GH 21552 with monkeypatch.context() as context: - context.setattr("sys.{}".format(empty), MockEncoding("")) - context.setattr("sys.{}".format(filled), MockEncoding(filled)) + context.setattr(f"sys.{empty}", MockEncoding("")) + context.setattr(f"sys.{filled}", MockEncoding(filled)) assert detect_console_encoding() == filled diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index d3f044a42eb28..9a14022d6f776 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -300,7 +300,7 @@ def test_to_html_border(option, result, expected): else: with option_context("display.html.border", option): result = result(df) - expected = 'border="{}"'.format(expected) + expected = f'border="{expected}"' assert expected in result @@ -318,7 +318,7 @@ def test_to_html(biggie_df_fixture): assert isinstance(s, str) df.to_html(columns=["B", "A"], col_space=17) - df.to_html(columns=["B", "A"], formatters={"A": lambda x: "{x:.1f}".format(x=x)}) + df.to_html(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"}) df.to_html(columns=["B", "A"], float_format=str) df.to_html(columns=["B", "A"], col_space=12, float_format=str) @@ -745,7 +745,7 @@ def test_to_html_with_col_space_units(unit): if isinstance(unit, int): unit = str(unit) + "px" for h in hdrs: - expected = ''.format(unit=unit) + expected = f'' assert expected in h diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index bd681032f155d..c2fbc59b8f482 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -117,10 +117,10 @@ def test_to_latex_with_formatters(self): formatters = { "datetime64": lambda x: x.strftime("%Y-%m"), - "float": lambda x: "[{x: 4.1f}]".format(x=x), - "int": lambda x: "0x{x:x}".format(x=x), - "object": lambda x: "-{x!s}-".format(x=x), - "__index__": lambda x: "index: {x}".format(x=x), + "float": lambda x: f"[{x: 4.1f}]", + "int": lambda x: f"0x{x:x}", + "object": lambda x: f"-{x!s}-", + "__index__": lambda x: f"index: {x}", } result = df.to_latex(formatters=dict(formatters)) @@ -744,9 +744,7 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): idx_names = tuple(n or "{}" for n in names) idx_names_row = ( - "{idx_names[0]} & {idx_names[1]} & & & & \\\\\n".format( - idx_names=idx_names - ) + f"{idx_names[0]} & {idx_names[1]} & & & & \\\\\n" if (0 in axes and any(names)) else "" ) diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index f7583c93b9288..ca853ba5f00f5 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -323,17 +323,17 @@ def write_legacy_pickles(output_dir): "This script generates a storage file for the current arch, system, " "and python version" ) - print(" pandas version: {0}".format(version)) - print(" output dir : {0}".format(output_dir)) + print(f" pandas version: {version}") + print(f" output dir : {output_dir}") print(" storage format: pickle") - pth = "{0}.pickle".format(platform_name()) + pth = f"{platform_name()}.pickle" fh = open(os.path.join(output_dir, pth), "wb") pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL) fh.close() - print("created pickle file: {pth}".format(pth=pth)) + print(f"created pickle file: {pth}") def write_legacy_file(): From 67fc9e00cc82c8928b85e364709ceaddee33c2da Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 14 Feb 2020 01:07:53 +0000 Subject: [PATCH 026/388] CLN: index related attributes on Series/DataFrame (#31953) --- pandas/core/frame.py | 15 ++++++++------- pandas/core/generic.py | 22 ---------------------- pandas/core/series.py | 9 ++++++++- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8edda99ed6df8..ceb455d3b0da3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8570,6 +8570,14 @@ def isin(self, values) -> "DataFrame": # ---------------------------------------------------------------------- # Add index and columns + _AXIS_ORDERS = ["index", "columns"] + _AXIS_NUMBERS = {"index": 0, "columns": 1} + _AXIS_NAMES = {0: "index", 1: "columns"} + _AXIS_REVERSED = True + _AXIS_LEN = len(_AXIS_ORDERS) + _info_axis_number = 1 + _info_axis_name = "columns" + index: "Index" = properties.AxisProperty( axis=1, doc="The index (row labels) of the DataFrame." ) @@ -8585,13 +8593,6 @@ def isin(self, values) -> "DataFrame": sparse = CachedAccessor("sparse", SparseFrameAccessor) -DataFrame._setup_axes( - ["index", "columns"], - docs={ - "index": "The index (row labels) of the DataFrame.", - "columns": "The column labels of the DataFrame.", - }, -) DataFrame._add_numeric_operations() DataFrame._add_series_or_dataframe_operations() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1a16f8792e9e7..04e8b78fb1b87 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -315,28 +315,6 @@ def _constructor_expanddim(self): _info_axis_name: str _AXIS_LEN: int - @classmethod - def _setup_axes(cls, axes: List[str], docs: Dict[str, str]) -> None: - """ - Provide axes setup for the major PandasObjects. - - Parameters - ---------- - axes : the names of the axes in order (lowest to highest) - docs : docstrings for the axis properties - """ - info_axis = len(axes) - 1 - axes_are_reversed = len(axes) > 1 - - cls._AXIS_ORDERS = axes - cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)} - cls._AXIS_LEN = len(axes) - cls._AXIS_NAMES = dict(enumerate(axes)) - cls._AXIS_REVERSED = axes_are_reversed - - cls._info_axis_number = info_axis - cls._info_axis_name = axes[info_axis] - def _construct_axes_dict(self, axes=None, **kwargs): """Return an axes dictionary for myself.""" d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} diff --git a/pandas/core/series.py b/pandas/core/series.py index 48fe86dd5e9c9..5260fbe8402b6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4564,6 +4564,14 @@ def to_period(self, freq=None, copy=True) -> "Series": # ---------------------------------------------------------------------- # Add index + _AXIS_ORDERS = ["index"] + _AXIS_NUMBERS = {"index": 0} + _AXIS_NAMES = {0: "index"} + _AXIS_REVERSED = False + _AXIS_LEN = len(_AXIS_ORDERS) + _info_axis_number = 0 + _info_axis_name = "index" + index: "Index" = properties.AxisProperty( axis=0, doc="The index (axis labels) of the Series." ) @@ -4582,7 +4590,6 @@ def to_period(self, freq=None, copy=True) -> "Series": hist = pandas.plotting.hist_series -Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."}) Series._add_numeric_operations() Series._add_series_or_dataframe_operations() From ff0515436c4fa02decc66be031fe708bef7b7ca5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 14 Feb 2020 01:21:41 +0000 Subject: [PATCH 027/388] add eval examples (#31955) --- pandas/core/computation/eval.py | 15 +++++++++++++++ pandas/core/frame.py | 15 +++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 4cdf4bac61316..f6947d5ec6233 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -276,6 +276,21 @@ def eval( See the :ref:`enhancing performance ` documentation for more details. + + Examples + -------- + >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) + >>> df + animal age + 0 dog 10 + 1 pig 20 + + We can add a new column using ``pd.eval``: + + >>> pd.eval("double_age = df.age * 2", target=df) + animal age double_age + 0 dog 10 20 + 1 pig 20 40 """ inplace = validate_bool_kwarg(inplace, "inplace") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ceb455d3b0da3..4ad80273f77ba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3319,6 +3319,21 @@ def eval(self, expr, inplace=False, **kwargs): 2 3 6 9 3 4 4 8 4 5 2 7 + + Multiple columns can be assigned to using multi-line expressions: + + >>> df.eval( + ... ''' + ... C = A + B + ... D = A - B + ... ''' + ... ) + A B C D + 0 1 10 11 -9 + 1 2 8 10 -6 + 2 3 6 9 -3 + 3 4 4 8 0 + 4 5 2 7 3 """ from pandas.core.computation.eval import eval as _eval From 5dd27ed75ac659457bc3d737e45005478a8496aa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Feb 2020 17:27:39 -0800 Subject: [PATCH 028/388] CLN: remove unreachable in Series._reduce (#31932) --- pandas/core/series.py | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 5260fbe8402b6..256586f3d36a1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -34,7 +34,6 @@ ensure_platform_int, is_bool, is_categorical_dtype, - is_datetime64_dtype, is_dict_like, is_extension_array_dtype, is_integer, @@ -42,7 +41,6 @@ is_list_like, is_object_dtype, is_scalar, - is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -64,7 +62,7 @@ from pandas.core import algorithms, base, generic, nanops, ops from pandas.core.accessor import CachedAccessor from pandas.core.arrays import ExtensionArray, try_cast_to_ea -from pandas.core.arrays.categorical import Categorical, CategoricalAccessor +from pandas.core.arrays.categorical import CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor import pandas.core.common as com from pandas.core.construction import ( @@ -3848,21 +3846,12 @@ def _reduce( if axis is not None: self._get_axis_number(axis) - if isinstance(delegate, Categorical): - return delegate._reduce(name, skipna=skipna, **kwds) - elif isinstance(delegate, ExtensionArray): + if isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface return delegate._reduce(name, skipna=skipna, **kwds) - elif is_datetime64_dtype(delegate): - # use DatetimeIndex implementation to handle skipna correctly - delegate = DatetimeIndex(delegate) - elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name): - # use TimedeltaIndex to handle skipna correctly - # TODO: remove hasattr check after TimedeltaIndex has `std` method - delegate = TimedeltaIndex(delegate) - - # dispatch to numpy arrays - elif isinstance(delegate, np.ndarray): + + else: + # dispatch to numpy arrays if numeric_only: raise NotImplementedError( f"Series.{name} does not implement numeric_only." @@ -3870,19 +3859,6 @@ def _reduce( with np.errstate(all="ignore"): return op(delegate, skipna=skipna, **kwds) - # TODO(EA) dispatch to Index - # remove once all internals extension types are - # moved to ExtensionArrays - return delegate._reduce( - op=op, - name=name, - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - filter_type=filter_type, - **kwds, - ) - def _reindex_indexer(self, new_index, indexer, copy): if indexer is None: if copy: From fa2aa9f54dc125e4d9a3546f99ec5bf3e1b50e4d Mon Sep 17 00:00:00 2001 From: Martin Bjeldbak Madsen Date: Fri, 14 Feb 2020 12:35:07 +1000 Subject: [PATCH 029/388] Update documentation to use recommended library (#31968) --- pandas/io/json/_normalize.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 714bebc260c06..f158ad6cd89e3 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -159,11 +159,10 @@ def _json_normalize( Examples -------- - >>> from pandas.io.json import json_normalize >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, ... {'name': {'given': 'Mose', 'family': 'Regner'}}, ... {'id': 2, 'name': 'Faye Raker'}] - >>> json_normalize(data) + >>> pandas.json_normalize(data) id name name.family name.first name.given name.last 0 1.0 NaN NaN Coleen NaN Volk 1 NaN NaN Regner NaN Mose NaN From bfcfaaeedfc38763517fa0acc4226a9dcad9c85c Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Fri, 14 Feb 2020 10:54:32 -0600 Subject: [PATCH 030/388] CLN: 29547 replace old string formatting 5 (#31967) --- pandas/tests/io/parser/test_c_parser_only.py | 10 +-- pandas/tests/io/parser/test_common.py | 4 +- pandas/tests/io/parser/test_compression.py | 2 +- pandas/tests/io/parser/test_encoding.py | 2 +- pandas/tests/io/parser/test_multi_thread.py | 4 +- pandas/tests/io/parser/test_na_values.py | 5 +- pandas/tests/io/parser/test_parse_dates.py | 2 +- pandas/tests/io/parser/test_read_fwf.py | 2 +- pandas/tests/io/pytables/conftest.py | 2 +- pandas/tests/io/pytables/test_store.py | 82 ++++++++------------ 10 files changed, 47 insertions(+), 68 deletions(-) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 1737f14e7adf9..5bbabc8e18c47 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -158,7 +158,7 @@ def test_precise_conversion(c_parser_only): # test numbers between 1 and 2 for num in np.linspace(1.0, 2.0, num=500): # 25 decimal digits of precision - text = "a\n{0:.25}".format(num) + text = f"a\n{num:.25}" normal_val = float(parser.read_csv(StringIO(text))["a"][0]) precise_val = float( @@ -170,7 +170,7 @@ def test_precise_conversion(c_parser_only): actual_val = Decimal(text[2:]) def error(val): - return abs(Decimal("{0:.100}".format(val)) - actual_val) + return abs(Decimal(f"{val:.100}") - actual_val) normal_errors.append(error(normal_val)) precise_errors.append(error(precise_val)) @@ -299,9 +299,7 @@ def test_grow_boundary_at_cap(c_parser_only): def test_empty_header_read(count): s = StringIO("," * count) - expected = DataFrame( - columns=["Unnamed: {i}".format(i=i) for i in range(count + 1)] - ) + expected = DataFrame(columns=[f"Unnamed: {i}" for i in range(count + 1)]) df = parser.read_csv(s) tm.assert_frame_equal(df, expected) @@ -489,7 +487,7 @@ def test_comment_whitespace_delimited(c_parser_only, capsys): captured = capsys.readouterr() # skipped lines 2, 3, 4, 9 for line_num in (2, 3, 4, 9): - assert "Skipping line {}".format(line_num) in captured.err + assert f"Skipping line {line_num}" in captured.err expected = DataFrame([[1, 2], [5, 2], [6, 2], [7, np.nan], [8, np.nan]]) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index c19056d434ec3..b3aa1aa14a509 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -957,7 +957,7 @@ def test_nonexistent_path(all_parsers): # gh-14086: raise more helpful FileNotFoundError # GH#29233 "File foo" instead of "File b'foo'" parser = all_parsers - path = "{}.csv".format(tm.rands(10)) + path = f"{tm.rands(10)}.csv" msg = f"File {path} does not exist" if parser.engine == "c" else r"\[Errno 2\]" with pytest.raises(FileNotFoundError, match=msg) as e: @@ -1872,7 +1872,7 @@ def test_internal_eof_byte_to_file(all_parsers): parser = all_parsers data = b'c1,c2\r\n"test \x1a test", test\r\n' expected = DataFrame([["test \x1a test", " test"]], columns=["c1", "c2"]) - path = "__{}__.csv".format(tm.rands(10)) + path = f"__{tm.rands(10)}__.csv" with tm.ensure_clean(path) as path: with open(path, "wb") as f: diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index dc03370daa1e2..b773664adda72 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -145,7 +145,7 @@ def test_invalid_compression(all_parsers, invalid_compression): parser = all_parsers compress_kwargs = dict(compression=invalid_compression) - msg = "Unrecognized compression type: {compression}".format(**compress_kwargs) + msg = f"Unrecognized compression type: {invalid_compression}" with pytest.raises(ValueError, match=msg): parser.read_csv("test_file.zip", **compress_kwargs) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 13f72a0414bac..3661e4e056db2 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -45,7 +45,7 @@ def test_utf16_bom_skiprows(all_parsers, sep, encoding): 4,5,6""".replace( ",", sep ) - path = "__{}__.csv".format(tm.rands(10)) + path = f"__{tm.rands(10)}__.csv" kwargs = dict(sep=sep, skiprows=2) utf8 = "utf-8" diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index 64ccaf60ec230..458ff4da55ed3 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -41,9 +41,7 @@ def test_multi_thread_string_io_read_csv(all_parsers): num_files = 100 bytes_to_df = [ - "\n".join( - ["{i:d},{i:d},{i:d}".format(i=i) for i in range(max_row_range)] - ).encode() + "\n".join([f"{i:d},{i:d},{i:d}" for i in range(max_row_range)]).encode() for _ in range(num_files) ] files = [BytesIO(b) for b in bytes_to_df] diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index f9a083d7f5d22..9f86bbd65640e 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -111,10 +111,11 @@ def f(i, v): elif i > 0: buf = "".join([","] * i) - buf = "{0}{1}".format(buf, v) + buf = f"{buf}{v}" if i < nv - 1: - buf = "{0}{1}".format(buf, "".join([","] * (nv - i - 1))) + joined = "".join([","] * (nv - i - 1)) + buf = f"{buf}{joined}" return buf diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index b01b22e811ee3..31573e4e6ecce 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1101,7 +1101,7 @@ def test_bad_date_parse(all_parsers, cache_dates, value): # if we have an invalid date make sure that we handle this with # and w/o the cache properly parser = all_parsers - s = StringIO(("{value},\n".format(value=value)) * 50000) + s = StringIO((f"{value},\n") * 50000) parser.read_csv( s, diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 27aef2376e87d..e982667f06f31 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -260,7 +260,7 @@ def test_fwf_regression(): # Turns out "T060" is parsable as a datetime slice! tz_list = [1, 10, 20, 30, 60, 80, 100] widths = [16] + [8] * len(tz_list) - names = ["SST"] + ["T{z:03d}".format(z=z) for z in tz_list[1:]] + names = ["SST"] + [f"T{z:03d}" for z in tz_list[1:]] data = """ 2009164202000 9.5403 9.4105 8.6571 7.8372 6.0612 5.8843 5.5192 2009164203000 9.5435 9.2010 8.6167 7.8176 6.0804 5.8728 5.4869 diff --git a/pandas/tests/io/pytables/conftest.py b/pandas/tests/io/pytables/conftest.py index 214f95c6fb441..38ffcb3b0e8ec 100644 --- a/pandas/tests/io/pytables/conftest.py +++ b/pandas/tests/io/pytables/conftest.py @@ -6,7 +6,7 @@ @pytest.fixture def setup_path(): """Fixture for setup path""" - return "tmp.__{}__.h5".format(tm.rands(10)) + return f"tmp.__{tm.rands(10)}__.h5" @pytest.fixture(scope="module", autouse=True) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 547de39eec5e0..fd585a73f6ce6 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -653,7 +653,7 @@ def test_getattr(self, setup_path): # not stores for x in ["mode", "path", "handle", "complib"]: - getattr(store, "_{x}".format(x=x)) + getattr(store, f"_{x}") def test_put(self, setup_path): @@ -690,9 +690,7 @@ def test_put_string_index(self, setup_path): with ensure_clean_store(setup_path) as store: - index = Index( - ["I am a very long string index: {i}".format(i=i) for i in range(20)] - ) + index = Index([f"I am a very long string index: {i}" for i in range(20)]) s = Series(np.arange(20), index=index) df = DataFrame({"A": s, "B": s}) @@ -705,7 +703,7 @@ def test_put_string_index(self, setup_path): # mixed length index = Index( ["abcdefghijklmnopqrstuvwxyz1234567890"] - + ["I am a very long string index: {i}".format(i=i) for i in range(20)] + + [f"I am a very long string index: {i}" for i in range(20)] ) s = Series(np.arange(21), index=index) df = DataFrame({"A": s, "B": s}) @@ -2044,7 +2042,7 @@ def test_unimplemented_dtypes_table_columns(self, setup_path): df = tm.makeDataFrame() df[n] = f with pytest.raises(TypeError): - store.append("df1_{n}".format(n=n), df) + store.append(f"df1_{n}", df) # frame df = tm.makeDataFrame() @@ -2689,16 +2687,12 @@ def test_select_dtypes(self, setup_path): expected = df[df.boolv == True].reindex(columns=["A", "boolv"]) # noqa for v in [True, "true", 1]: - result = store.select( - "df", "boolv == {v!s}".format(v=v), columns=["A", "boolv"] - ) + result = store.select("df", f"boolv == {v}", columns=["A", "boolv"]) tm.assert_frame_equal(expected, result) expected = df[df.boolv == False].reindex(columns=["A", "boolv"]) # noqa for v in [False, "false", 0]: - result = store.select( - "df", "boolv == {v!s}".format(v=v), columns=["A", "boolv"] - ) + result = store.select("df", f"boolv == {v}", columns=["A", "boolv"]) tm.assert_frame_equal(expected, result) # integer index @@ -2784,7 +2778,7 @@ def test_select_with_many_inputs(self, setup_path): users=["a"] * 50 + ["b"] * 50 + ["c"] * 100 - + ["a{i:03d}".format(i=i) for i in range(100)], + + [f"a{i:03d}" for i in range(100)], ) ) _maybe_remove(store, "df") @@ -2805,7 +2799,7 @@ def test_select_with_many_inputs(self, setup_path): tm.assert_frame_equal(expected, result) # big selector along the columns - selector = ["a", "b", "c"] + ["a{i:03d}".format(i=i) for i in range(60)] + selector = ["a", "b", "c"] + [f"a{i:03d}" for i in range(60)] result = store.select( "df", "ts>=Timestamp('2012-02-01') and users=selector" ) @@ -2914,21 +2908,19 @@ def test_select_iterator_complete_8014(self, setup_path): # select w/o iterator and where clause, single term, begin # of range, works - where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + where = f"index >= '{beg_dt}'" result = store.select("df", where=where) tm.assert_frame_equal(expected, result) # select w/o iterator and where clause, single term, end # of range, works - where = "index <= '{end_dt}'".format(end_dt=end_dt) + where = f"index <= '{end_dt}'" result = store.select("df", where=where) tm.assert_frame_equal(expected, result) # select w/o iterator and where clause, inclusive range, # works - where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( - beg_dt=beg_dt, end_dt=end_dt - ) + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" result = store.select("df", where=where) tm.assert_frame_equal(expected, result) @@ -2948,21 +2940,19 @@ def test_select_iterator_complete_8014(self, setup_path): tm.assert_frame_equal(expected, result) # select w/iterator and where clause, single term, begin of range - where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + where = f"index >= '{beg_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) result = concat(results) tm.assert_frame_equal(expected, result) # select w/iterator and where clause, single term, end of range - where = "index <= '{end_dt}'".format(end_dt=end_dt) + where = f"index <= '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) result = concat(results) tm.assert_frame_equal(expected, result) # select w/iterator and where clause, inclusive range - where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( - beg_dt=beg_dt, end_dt=end_dt - ) + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) result = concat(results) tm.assert_frame_equal(expected, result) @@ -2984,23 +2974,21 @@ def test_select_iterator_non_complete_8014(self, setup_path): end_dt = expected.index[-2] # select w/iterator and where clause, single term, begin of range - where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + where = f"index >= '{beg_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) result = concat(results) rexpected = expected[expected.index >= beg_dt] tm.assert_frame_equal(rexpected, result) # select w/iterator and where clause, single term, end of range - where = "index <= '{end_dt}'".format(end_dt=end_dt) + where = f"index <= '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) result = concat(results) rexpected = expected[expected.index <= end_dt] tm.assert_frame_equal(rexpected, result) # select w/iterator and where clause, inclusive range - where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( - beg_dt=beg_dt, end_dt=end_dt - ) + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) result = concat(results) rexpected = expected[ @@ -3018,7 +3006,7 @@ def test_select_iterator_non_complete_8014(self, setup_path): end_dt = expected.index[-1] # select w/iterator and where clause, single term, begin of range - where = "index > '{end_dt}'".format(end_dt=end_dt) + where = f"index > '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) assert 0 == len(results) @@ -3040,14 +3028,14 @@ def test_select_iterator_many_empty_frames(self, setup_path): end_dt = expected.index[chunksize - 1] # select w/iterator and where clause, single term, begin of range - where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + where = f"index >= '{beg_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) result = concat(results) rexpected = expected[expected.index >= beg_dt] tm.assert_frame_equal(rexpected, result) # select w/iterator and where clause, single term, end of range - where = "index <= '{end_dt}'".format(end_dt=end_dt) + where = f"index <= '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) assert len(results) == 1 @@ -3056,9 +3044,7 @@ def test_select_iterator_many_empty_frames(self, setup_path): tm.assert_frame_equal(rexpected, result) # select w/iterator and where clause, inclusive range - where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( - beg_dt=beg_dt, end_dt=end_dt - ) + where = f"index >= '{beg_dt}' & index <= '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) # should be 1, is 10 @@ -3076,9 +3062,7 @@ def test_select_iterator_many_empty_frames(self, setup_path): # return [] e.g. `for e in []: print True` never prints # True. - where = "index <= '{beg_dt}' & index >= '{end_dt}'".format( - beg_dt=beg_dt, end_dt=end_dt - ) + where = f"index <= '{beg_dt}' & index >= '{end_dt}'" results = list(store.select("df", where=where, chunksize=chunksize)) # should be [] @@ -3807,8 +3791,8 @@ def test_start_stop_fixed(self, setup_path): def test_select_filter_corner(self, setup_path): df = DataFrame(np.random.randn(50, 100)) - df.index = ["{c:3d}".format(c=c) for c in df.index] - df.columns = ["{c:3d}".format(c=c) for c in df.columns] + df.index = [f"{c:3d}" for c in df.index] + df.columns = [f"{c:3d}" for c in df.columns] with ensure_clean_store(setup_path) as store: store.put("frame", df, format="table") @@ -4259,7 +4243,7 @@ def test_append_with_diff_col_name_types_raises_value_error(self, setup_path): df5 = DataFrame({("1", 2, object): np.random.randn(10)}) with ensure_clean_store(setup_path) as store: - name = "df_{}".format(tm.rands(10)) + name = f"df_{tm.rands(10)}" store.append(name, df) for d in (df2, df3, df4, df5): @@ -4543,9 +4527,7 @@ def test_to_hdf_with_object_column_names(self, setup_path): with ensure_clean_path(setup_path) as path: with catch_warnings(record=True): df.to_hdf(path, "df", format="table", data_columns=True) - result = pd.read_hdf( - path, "df", where="index = [{0}]".format(df.index[0]) - ) + result = pd.read_hdf(path, "df", where=f"index = [{df.index[0]}]") assert len(result) def test_read_hdf_open_store(self, setup_path): @@ -4678,16 +4660,16 @@ def test_query_long_float_literal(self, setup_path): store.append("test", df, format="table", data_columns=True) cutoff = 1000000000.0006 - result = store.select("test", "A < {cutoff:.4f}".format(cutoff=cutoff)) + result = store.select("test", f"A < {cutoff:.4f}") assert result.empty cutoff = 1000000000.0010 - result = store.select("test", "A > {cutoff:.4f}".format(cutoff=cutoff)) + result = store.select("test", f"A > {cutoff:.4f}") expected = df.loc[[1, 2], :] tm.assert_frame_equal(expected, result) exact = 1000000000.0011 - result = store.select("test", "A == {exact:.4f}".format(exact=exact)) + result = store.select("test", f"A == {exact:.4f}") expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) @@ -4714,21 +4696,21 @@ def test_query_compare_column_type(self, setup_path): for op in ["<", ">", "=="]: # non strings to string column always fail for v in [2.1, True, pd.Timestamp("2014-01-01"), pd.Timedelta(1, "s")]: - query = "date {op} v".format(op=op) + query = f"date {op} v" with pytest.raises(TypeError): store.select("test", where=query) # strings to other columns must be convertible to type v = "a" for col in ["int", "float", "real_date"]: - query = "{col} {op} v".format(op=op, col=col) + query = f"{col} {op} v" with pytest.raises(ValueError): store.select("test", where=query) for v, col in zip( ["1", "1.1", "2014-01-01"], ["int", "float", "real_date"] ): - query = "{col} {op} v".format(op=op, col=col) + query = f"{col} {op} v" result = store.select("test", where=query) if op == "==": From 8425c260ba964c6fd9bf9f15b02b48bda5ac1527 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 14 Feb 2020 20:45:35 +0200 Subject: [PATCH 031/388] STY: Fixed wrong placement of whitespace (#31974) --- pandas/tests/arrays/test_integer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 9186c33c12c06..0a5a2362bd290 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -347,10 +347,10 @@ def test_error(self, data, all_arithmetic_operators): # TODO(extension) # rpow with a datetimelike coerces the integer array incorrectly msg = ( - r"(:?can only perform ops with numeric values)" - r"|(:?cannot perform .* with this index type: DatetimeArray)" - r"|(:?Addition/subtraction of integers and integer-arrays" - r" with DatetimeArray is no longer supported. *)" + "can only perform ops with numeric values|" + "cannot perform .* with this index type: DatetimeArray|" + "Addition/subtraction of integers and integer-arrays " + "with DatetimeArray is no longer supported. *" ) with pytest.raises(TypeError, match=msg): ops(pd.Series(pd.date_range("20180101", periods=len(s)))) From 6d30046709dec6a8cc6bdefd63dd4e1b874d341c Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Fri, 14 Feb 2020 13:11:47 -0600 Subject: [PATCH 032/388] CLN: 29547 replace old string formatting 6 (#31980) --- pandas/tests/indexing/test_floats.py | 2 +- pandas/tests/io/pytables/test_timezones.py | 4 +--- pandas/tests/io/test_html.py | 4 ++-- pandas/tests/io/test_stata.py | 2 +- pandas/tests/resample/test_period_index.py | 22 ++++++++----------- pandas/tests/reshape/merge/test_join.py | 13 +++++------ pandas/tests/reshape/merge/test_merge.py | 22 ++++++++----------- pandas/tests/reshape/merge/test_merge_asof.py | 2 +- pandas/tests/reshape/test_melt.py | 8 +++---- pandas/tests/reshape/test_pivot.py | 4 ++-- .../scalar/timedelta/test_constructors.py | 2 +- 11 files changed, 36 insertions(+), 49 deletions(-) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 80a4d81b20a13..dbda3994b1c2a 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -53,7 +53,7 @@ def test_scalar_error(self, index_func): s.iloc[3.0] msg = ( - fr"cannot do positional indexing on {type(i).__name__} with these " + f"cannot do positional indexing on {type(i).__name__} with these " r"indexers \[3\.0\] of type float" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index 2bf22d982e5fe..74d5a77f86827 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -24,9 +24,7 @@ def _compare_with_tz(a, b): a_e = a.loc[i, c] b_e = b.loc[i, c] if not (a_e == b_e and a_e.tz == b_e.tz): - raise AssertionError( - "invalid tz comparison [{a_e}] [{b_e}]".format(a_e=a_e, b_e=b_e) - ) + raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]") def test_append_with_timezones_dateutil(setup_path): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index b649e394c780b..cbaf16d048eda 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -40,8 +40,8 @@ def html_encoding_file(request, datapath): def assert_framelist_equal(list1, list2, *args, **kwargs): assert len(list1) == len(list2), ( "lists are not of equal size " - "len(list1) == {0}, " - "len(list2) == {1}".format(len(list1), len(list2)) + f"len(list1) == {len(list1)}, " + f"len(list2) == {len(list2)}" ) msg = "not all list elements are DataFrames" both_frames = all( diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index cb2112b481952..b65efac2bd527 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1715,7 +1715,7 @@ def test_invalid_file_not_written(self, version): "'ascii' codec can't decode byte 0xef in position 14: " r"ordinal not in range\(128\)" ) - with pytest.raises(UnicodeEncodeError, match=r"{}|{}".format(msg1, msg2)): + with pytest.raises(UnicodeEncodeError, match=f"{msg1}|{msg2}"): with tm.assert_produces_warning(ResourceWarning): df.to_stata(path) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index ff303b808f6f5..70b65209db955 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -96,9 +96,7 @@ def test_selection(self, index, freq, kind, kwargs): def test_annual_upsample_cases( self, targ, conv, meth, month, simple_period_range_series ): - ts = simple_period_range_series( - "1/1/1990", "12/31/1991", freq="A-{month}".format(month=month) - ) + ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"A-{month}") result = getattr(ts.resample(targ, convention=conv), meth)() expected = result.to_timestamp(targ, how=conv) @@ -130,9 +128,9 @@ def test_not_subperiod(self, simple_period_range_series, rule, expected_error_ms # These are incompatible period rules for resampling ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed") msg = ( - "Frequency cannot be resampled to {}, as they " - "are not sub or super periods" - ).format(expected_error_msg) + "Frequency cannot be resampled to " + f"{expected_error_msg}, as they are not sub or super periods" + ) with pytest.raises(IncompatibleFrequency, match=msg): ts.resample(rule).mean() @@ -176,7 +174,7 @@ def test_annual_upsample(self, simple_period_range_series): def test_quarterly_upsample( self, month, target, convention, simple_period_range_series ): - freq = "Q-{month}".format(month=month) + freq = f"Q-{month}" ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) @@ -351,7 +349,7 @@ def test_fill_method_and_how_upsample(self): @pytest.mark.parametrize("target", ["D", "B"]) @pytest.mark.parametrize("convention", ["start", "end"]) def test_weekly_upsample(self, day, target, convention, simple_period_range_series): - freq = "W-{day}".format(day=day) + freq = f"W-{day}" ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) result = ts.resample(target, convention=convention).ffill() expected = result.to_timestamp(target, how=convention) @@ -367,16 +365,14 @@ def test_resample_to_timestamps(self, simple_period_range_series): def test_resample_to_quarterly(self, simple_period_range_series): for month in MONTHS: - ts = simple_period_range_series( - "1990", "1992", freq="A-{month}".format(month=month) - ) - quar_ts = ts.resample("Q-{month}".format(month=month)).ffill() + ts = simple_period_range_series("1990", "1992", freq=f"A-{month}") + quar_ts = ts.resample(f"Q-{month}").ffill() stamps = ts.to_timestamp("D", how="start") qdates = period_range( ts.index[0].asfreq("D", "start"), ts.index[-1].asfreq("D", "end"), - freq="Q-{month}".format(month=month), + freq=f"Q-{month}", ) expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill") diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 7020d373caf82..685995ee201f8 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -262,8 +262,9 @@ def test_join_on_fails_with_wrong_object_type(self, wrong_type): # Edited test to remove the Series object from test parameters df = DataFrame({"a": [1, 1]}) - msg = "Can only merge Series or DataFrame objects, a {} was passed".format( - str(type(wrong_type)) + msg = ( + "Can only merge Series or DataFrame objects, " + f"a {type(wrong_type)} was passed" ) with pytest.raises(TypeError, match=msg): merge(wrong_type, df, left_on="a", right_on="a") @@ -812,9 +813,7 @@ def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix except KeyError: if how in ("left", "inner"): raise AssertionError( - "key {group_key!s} should not have been in the join".format( - group_key=group_key - ) + f"key {group_key} should not have been in the join" ) _assert_all_na(l_joined, left.columns, join_col) @@ -826,9 +825,7 @@ def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix except KeyError: if how in ("right", "inner"): raise AssertionError( - "key {group_key!s} should not have been in the join".format( - group_key=group_key - ) + f"key {group_key} should not have been in the join" ) _assert_all_na(r_joined, right.columns, join_col) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index fd189c7435b29..4f2cd878df613 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -710,7 +710,7 @@ def test_other_timedelta_unit(self, unit): df1 = pd.DataFrame({"entity_id": [101, 102]}) s = pd.Series([None, None], index=[101, 102], name="days") - dtype = "m8[{}]".format(unit) + dtype = f"m8[{unit}]" df2 = s.astype(dtype).to_frame("days") assert df2["days"].dtype == "m8[ns]" @@ -1012,9 +1012,9 @@ def test_indicator(self): msg = ( "Cannot use `indicator=True` option when data contains a " - "column named {}|" + f"column named {i}|" "Cannot use name of an existing column for indicator column" - ).format(i) + ) with pytest.raises(ValueError, match=msg): merge(df1, df_badcolumn, on="col1", how="outer", indicator=True) with pytest.raises(ValueError, match=msg): @@ -1555,11 +1555,9 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): df2 = DataFrame({"A": df2_vals}) msg = ( - "You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format( - lk_dtype=df1["A"].dtype, rk_dtype=df2["A"].dtype - ) + f"You are trying to merge on {df1['A'].dtype} and " + f"{df2['A'].dtype} columns. If you wish to proceed " + "you should use pd.concat" ) msg = re.escape(msg) with pytest.raises(ValueError, match=msg): @@ -1567,11 +1565,9 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): # Check that error still raised when swapping order of dataframes msg = ( - "You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format( - lk_dtype=df2["A"].dtype, rk_dtype=df1["A"].dtype - ) + f"You are trying to merge on {df2['A'].dtype} and " + f"{df1['A'].dtype} columns. If you wish to proceed " + "you should use pd.concat" ) msg = re.escape(msg) with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 9b5dea7663396..9b09f0033715d 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1196,7 +1196,7 @@ def test_merge_groupby_multiple_column_with_categorical_column(self): @pytest.mark.parametrize("side", ["left", "right"]) def test_merge_on_nans(self, func, side): # GH 23189 - msg = "Merge keys contain null values on {} side".format(side) + msg = f"Merge keys contain null values on {side} side" nulls = func([1.0, 5.0, np.nan]) non_nulls = func([1.0, 5.0, 10.0]) df_null = pd.DataFrame({"a": nulls, "left_val": ["a", "b", "c"]}) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 814325844cb4c..6a670e6c729e9 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -364,8 +364,8 @@ def test_pairs(self): df = DataFrame(data) spec = { - "visitdt": ["visitdt{i:d}".format(i=i) for i in range(1, 4)], - "wt": ["wt{i:d}".format(i=i) for i in range(1, 4)], + "visitdt": [f"visitdt{i:d}" for i in range(1, 4)], + "wt": [f"wt{i:d}" for i in range(1, 4)], } result = lreshape(df, spec) @@ -557,8 +557,8 @@ def test_pairs(self): result = lreshape(df, spec, dropna=False, label="foo") spec = { - "visitdt": ["visitdt{i:d}".format(i=i) for i in range(1, 3)], - "wt": ["wt{i:d}".format(i=i) for i in range(1, 4)], + "visitdt": [f"visitdt{i:d}" for i in range(1, 3)], + "wt": [f"wt{i:d}" for i in range(1, 4)], } msg = "All column lists must be same length" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index fe75aef1ca3d7..e09a2a7907177 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1161,9 +1161,9 @@ def test_margins_no_values_two_row_two_cols(self): def test_pivot_table_with_margins_set_margin_name(self, margin_name): # see gh-3335 msg = ( - r'Conflicting name "{}" in margins|' + f'Conflicting name "{margin_name}" in margins|' "margins_name argument must be a string" - ).format(margin_name) + ) with pytest.raises(ValueError, match=msg): # multi-index index pivot_table( diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 25c9fc19981be..d32d1994cac74 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -239,7 +239,7 @@ def test_iso_constructor(fmt, exp): ], ) def test_iso_constructor_raises(fmt): - msg = "Invalid ISO 8601 Duration format - {}".format(fmt) + msg = f"Invalid ISO 8601 Duration format - {fmt}" with pytest.raises(ValueError, match=msg): Timedelta(fmt) From 00a00a1821bc6ce4f3a83168e378e88c3ebb4285 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 14 Feb 2020 21:23:52 +0200 Subject: [PATCH 033/388] CI: Removed pattern check for specific modules (#31975) --- ci/code_checks.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7eb80077c4fab..bb7d8a388e6e2 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -259,8 +259,7 @@ fi if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests frame.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/frame.py \ - -k" -itertuples -join -reindex -reindex_axis -round" + pytest -q --doctest-modules pandas/core/frame.py RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests series.py' ; echo $MSG @@ -294,8 +293,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests interval classes' ; echo $MSG pytest -q --doctest-modules \ pandas/core/indexes/interval.py \ - pandas/core/arrays/interval.py \ - -k"-from_arrays -from_breaks -from_intervals -from_tuples -set_closed -to_tuples -interval_range" + pandas/core/arrays/interval.py RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests arrays'; echo $MSG From bead1c3b6908e31f7a524e7290d7e7ca04e1c789 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 14 Feb 2020 16:05:04 -0800 Subject: [PATCH 034/388] TST: parametrize tests.indexing.test_float (#31855) --- pandas/tests/indexing/test_floats.py | 577 +++++++++++++-------------- 1 file changed, 286 insertions(+), 291 deletions(-) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index dbda3994b1c2a..4d3f1b0539aee 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -5,6 +5,16 @@ import pandas._testing as tm +def gen_obj(klass, index): + if klass is Series: + obj = Series(np.arange(len(index)), index=index) + else: + obj = DataFrame( + np.random.randn(len(index), len(index)), index=index, columns=index + ) + return obj + + class TestFloatIndexers: def check(self, result, original, indexer, getitem): """ @@ -70,97 +80,107 @@ def test_scalar_error(self, index_func): tm.makePeriodIndex, ], ) - def test_scalar_non_numeric(self, index_func): + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_scalar_non_numeric(self, index_func, klass): # GH 4892 # float_indexers should raise exceptions # on appropriate Index types & accessors i = index_func(5) + s = gen_obj(klass, i) - for s in [ - Series(np.arange(len(i)), index=i), - DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), - ]: - - # getting - for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]: + # getting + for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]: - # gettitem on a DataFrame is a KeyError as it is indexing - # via labels on the columns - if getitem and isinstance(s, DataFrame): - error = KeyError - msg = r"^3(\.0)?$" - else: - error = TypeError - msg = ( - r"cannot do (label|positional) indexing " - fr"on {type(i).__name__} with these indexers \[3\.0\] of " - r"type float|" - "Cannot index by location index with a " - "non-integer key" - ) - with pytest.raises(error, match=msg): - idxr(s)[3.0] - - # label based can be a TypeError or KeyError - if s.index.inferred_type in { - "categorical", - "string", - "unicode", - "mixed", - }: + # gettitem on a DataFrame is a KeyError as it is indexing + # via labels on the columns + if getitem and isinstance(s, DataFrame): error = KeyError - msg = r"^3\.0$" + msg = r"^3(\.0)?$" else: error = TypeError msg = ( r"cannot do (label|positional) indexing " fr"on {type(i).__name__} with these indexers \[3\.0\] of " - "type float" + r"type float|" + "Cannot index by location index with a " + "non-integer key" ) with pytest.raises(error, match=msg): - s.loc[3.0] - - # contains - assert 3.0 not in s - - # setting with a float fails with iloc + idxr(s)[3.0] + + # label based can be a TypeError or KeyError + if s.index.inferred_type in { + "categorical", + "string", + "unicode", + "mixed", + }: + error = KeyError + msg = r"^3\.0$" + else: + error = TypeError msg = ( r"cannot do (label|positional) indexing " fr"on {type(i).__name__} with these indexers \[3\.0\] of " "type float" ) - with pytest.raises(TypeError, match=msg): - s.iloc[3.0] = 0 - - # setting with an indexer - if s.index.inferred_type in ["categorical"]: - # Value or Type Error - pass - elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]: - - # these should prob work - # and are inconsistent between series/dataframe ATM - # for idxr in [lambda x: x]: - # s2 = s.copy() - # - # with pytest.raises(TypeError): - # idxr(s2)[3.0] = 0 - pass + with pytest.raises(error, match=msg): + s.loc[3.0] - else: + # contains + assert 3.0 not in s + + # setting with a float fails with iloc + msg = ( + r"cannot do (label|positional) indexing " + fr"on {type(i).__name__} with these indexers \[3\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] = 0 + + # setting with an indexer + if s.index.inferred_type in ["categorical"]: + # Value or Type Error + pass + elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]: + + # these should prob work + # and are inconsistent between series/dataframe ATM + # for idxr in [lambda x: x]: + # s2 = s.copy() + # + # with pytest.raises(TypeError): + # idxr(s2)[3.0] = 0 + pass + else: + + s2 = s.copy() + s2.loc[3.0] = 10 + assert s2.index.is_object() + + for idxr in [lambda x: x]: s2 = s.copy() - s2.loc[3.0] = 10 + idxr(s2)[3.0] = 0 assert s2.index.is_object() - for idxr in [lambda x: x]: - s2 = s.copy() - idxr(s2)[3.0] = 0 - assert s2.index.is_object() - + @pytest.mark.parametrize( + "index_func", + [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ], + ) + def test_scalar_non_numeric_series_fallback(self, index_func): # fallsback to position selection, series only + i = index_func(5) s = Series(np.arange(len(i)), index=i) s[3] msg = ( @@ -178,16 +198,16 @@ def test_scalar_with_mixed(self): # lookup in a pure stringstr # with an invalid indexer - for idxr in [lambda x: x, lambda x: x.iloc]: - - msg = ( - "cannot do label indexing " - fr"on {Index.__name__} with these indexers \[1\.0\] of " - r"type float|" - "Cannot index by location index with a non-integer key" - ) - with pytest.raises(TypeError, match=msg): - idxr(s2)[1.0] + msg = ( + "cannot do label indexing " + fr"on {Index.__name__} with these indexers \[1\.0\] of " + r"type float|" + "Cannot index by location index with a non-integer key" + ) + with pytest.raises(TypeError, match=msg): + s2[1.0] + with pytest.raises(TypeError, match=msg): + s2.iloc[1.0] with pytest.raises(KeyError, match=r"^1\.0$"): s2.loc[1.0] @@ -198,19 +218,17 @@ def test_scalar_with_mixed(self): # mixed index so we have label # indexing - for idxr in [lambda x: x]: - - msg = ( - "cannot do label indexing " - fr"on {Index.__name__} with these indexers \[1\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - idxr(s3)[1.0] + msg = ( + "cannot do label indexing " + fr"on {Index.__name__} with these indexers \[1\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s3[1.0] - result = idxr(s3)[1] - expected = 2 - assert result == expected + result = s3[1] + expected = 2 + assert result == expected msg = "Cannot index by location index with a non-integer key" with pytest.raises(TypeError, match=msg): @@ -234,6 +252,7 @@ def test_scalar_integer(self, index_func, klass): i = index_func(5) if klass is Series: + # TODO: Should we be passing index=i here? obj = Series(np.arange(len(i))) else: obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i) @@ -273,58 +292,54 @@ def compare(x, y): # coerce to equal int assert 3.0 in obj - def test_scalar_float(self): + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_scalar_float(self, klass): # scalar float indexers work on a float index index = Index(np.arange(5.0)) - for s in [ - Series(np.arange(len(index)), index=index), - DataFrame( - np.random.randn(len(index), len(index)), index=index, columns=index - ), - ]: + s = gen_obj(klass, index) - # assert all operations except for iloc are ok - indexer = index[3] - for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: + # assert all operations except for iloc are ok + indexer = index[3] + for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: - # getting - result = idxr(s)[indexer] - self.check(result, s, 3, getitem) + # getting + result = idxr(s)[indexer] + self.check(result, s, 3, getitem) - # setting - s2 = s.copy() + # setting + s2 = s.copy() - result = idxr(s2)[indexer] - self.check(result, s, 3, getitem) + result = idxr(s2)[indexer] + self.check(result, s, 3, getitem) - # random integer is a KeyError - with pytest.raises(KeyError, match=r"^3\.5$"): - idxr(s)[3.5] + # random integer is a KeyError + with pytest.raises(KeyError, match=r"^3\.5$"): + idxr(s)[3.5] - # contains - assert 3.0 in s + # contains + assert 3.0 in s - # iloc succeeds with an integer - expected = s.iloc[3] - s2 = s.copy() + # iloc succeeds with an integer + expected = s.iloc[3] + s2 = s.copy() - s2.iloc[3] = expected - result = s2.iloc[3] - self.check(result, s, 3, False) + s2.iloc[3] = expected + result = s2.iloc[3] + self.check(result, s, 3, False) - # iloc raises with a float - msg = "Cannot index by location index with a non-integer key" - with pytest.raises(TypeError, match=msg): - s.iloc[3.0] + # iloc raises with a float + msg = "Cannot index by location index with a non-integer key" + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] - msg = ( - "cannot do positional indexing " - fr"on {Float64Index.__name__} with these indexers \[3\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s2.iloc[3.0] = 0 + msg = ( + "cannot do positional indexing " + fr"on {Float64Index.__name__} with these indexers \[3\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s2.iloc[3.0] = 0 @pytest.mark.parametrize( "index_func", @@ -336,60 +351,54 @@ def test_scalar_float(self): tm.makePeriodIndex, ], ) - def test_slice_non_numeric(self, index_func): + @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_slice_non_numeric(self, index_func, l, klass): # GH 4892 # float_indexers should raise exceptions # on appropriate Index types & accessors index = index_func(5) - for s in [ - Series(range(5), index=index), - DataFrame(np.random.randn(5, 2), index=index), - ]: - - # getitem - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: - - msg = ( - "cannot do positional indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s.iloc[l] + s = gen_obj(klass, index) - for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: + # getitem + msg = ( + "cannot do positional indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s.iloc[l] - msg = ( - "cannot do (slice|positional) indexing " - fr"on {type(index).__name__} with these indexers " - r"\[(3|4)(\.0)?\] " - r"of type (float|int)" - ) - with pytest.raises(TypeError, match=msg): - idxr(s)[l] + msg = ( + "cannot do (slice|positional) indexing " + fr"on {type(index).__name__} with these indexers " + r"\[(3|4)(\.0)?\] " + r"of type (float|int)" + ) + for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: + with pytest.raises(TypeError, match=msg): + idxr(s)[l] - # setitem - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + # setitem + msg = ( + "cannot do positional indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s.iloc[l] = 0 - msg = ( - "cannot do positional indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s.iloc[l] = 0 - - for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: - msg = ( - "cannot do (slice|positional) indexing " - fr"on {type(index).__name__} with these indexers " - r"\[(3|4)(\.0)?\] " - r"of type (float|int)" - ) - with pytest.raises(TypeError, match=msg): - idxr(s)[l] = 0 + msg = ( + "cannot do (slice|positional) indexing " + fr"on {type(index).__name__} with these indexers " + r"\[(3|4)(\.0)?\] " + r"of type (float|int)" + ) + for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: + with pytest.raises(TypeError, match=msg): + idxr(s)[l] = 0 def test_slice_integer(self): @@ -409,18 +418,16 @@ def test_slice_integer(self): # getitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: - for idxr in [lambda x: x.loc]: - - result = idxr(s)[l] + result = s.loc[l] - # these are all label indexing - # except getitem which is positional - # empty - if oob: - indexer = slice(0, 0) - else: - indexer = slice(3, 5) - self.check(result, s, indexer, False) + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(3, 5) + self.check(result, s, indexer, False) # positional indexing msg = ( @@ -434,17 +441,16 @@ def test_slice_integer(self): # getitem out-of-bounds for l in [slice(-6, 6), slice(-6.0, 6.0)]: - for idxr in [lambda x: x.loc]: - result = idxr(s)[l] + result = s.loc[l] - # these are all label indexing - # except getitem which is positional - # empty - if oob: - indexer = slice(0, 0) - else: - indexer = slice(-6, 6) - self.check(result, s, indexer, False) + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(-6, 6) + self.check(result, s, indexer, False) # positional indexing msg = ( @@ -462,15 +468,13 @@ def test_slice_integer(self): (slice(2.5, 3.5), slice(3, 4)), ]: - for idxr in [lambda x: x.loc]: - - result = idxr(s)[l] - if oob: - res = slice(0, 0) - else: - res = res1 + result = s.loc[l] + if oob: + res = slice(0, 0) + else: + res = res1 - self.check(result, s, res, False) + self.check(result, s, res, False) # positional indexing msg = ( @@ -484,11 +488,10 @@ def test_slice_integer(self): # setitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: - for idxr in [lambda x: x.loc]: - sc = s.copy() - idxr(sc)[l] = 0 - result = idxr(sc)[l].values.ravel() - assert (result == 0).all() + sc = s.copy() + sc.loc[l] = 0 + result = sc.loc[l].values.ravel() + assert (result == 0).all() # positional indexing msg = ( @@ -499,7 +502,8 @@ def test_slice_integer(self): with pytest.raises(TypeError, match=msg): s[l] = 0 - def test_integer_positional_indexing(self): + @pytest.mark.parametrize("l", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]) + def test_integer_positional_indexing(self, l): """ make sure that we are raising on positional indexing w.r.t. an integer index """ @@ -509,18 +513,16 @@ def test_integer_positional_indexing(self): expected = s.iloc[2:4] tm.assert_series_equal(result, expected) - for idxr in [lambda x: x, lambda x: x.iloc]: - - for l in [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]: - - klass = RangeIndex - msg = ( - "cannot do (slice|positional) indexing " - fr"on {klass.__name__} with these indexers \[(2|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - idxr(s)[l] + klass = RangeIndex + msg = ( + "cannot do (slice|positional) indexing " + fr"on {klass.__name__} with these indexers \[(2|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[l] + with pytest.raises(TypeError, match=msg): + s.iloc[l] @pytest.mark.parametrize( "index_func", [tm.makeIntIndex, tm.makeRangeIndex], @@ -532,102 +534,95 @@ def test_slice_integer_frame_getitem(self, index_func): s = DataFrame(np.random.randn(5, 2), index=index) - def f(idxr): - - # getitem - for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: - - result = idxr(s)[l] - indexer = slice(0, 2) - self.check(result, s, indexer, False) - - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(0|1)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] - - # getitem out-of-bounds - for l in [slice(-10, 10), slice(-10.0, 10.0)]: + # getitem + for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: - result = idxr(s)[l] - self.check(result, s, slice(-10, 10), True) + result = s.loc[l] + indexer = slice(0, 2) + self.check(result, s, indexer, False) # positional indexing msg = ( "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[-10\.0\] of " + fr"on {type(index).__name__} with these indexers \[(0|1)\.0\] of " "type float" ) with pytest.raises(TypeError, match=msg): - s[slice(-10.0, 10.0)] + s[l] - # getitem odd floats - for l, res in [ - (slice(0.5, 1), slice(1, 2)), - (slice(0, 0.5), slice(0, 1)), - (slice(0.5, 1.5), slice(1, 2)), - ]: + # getitem out-of-bounds + for l in [slice(-10, 10), slice(-10.0, 10.0)]: - result = idxr(s)[l] - self.check(result, s, res, False) + result = s.loc[l] + self.check(result, s, slice(-10, 10), True) - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[0\.5\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[-10\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[slice(-10.0, 10.0)] - # setitem - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + # getitem odd floats + for l, res in [ + (slice(0.5, 1), slice(1, 2)), + (slice(0, 0.5), slice(0, 1)), + (slice(0.5, 1.5), slice(1, 2)), + ]: - sc = s.copy() - idxr(sc)[l] = 0 - result = idxr(sc)[l].values.ravel() - assert (result == 0).all() + result = s.loc[l] + self.check(result, s, res, False) - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] = 0 + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[0\.5\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[l] - f(lambda x: x.loc) + # setitem + for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + sc = s.copy() + sc.loc[l] = 0 + result = sc.loc[l].values.ravel() + assert (result == 0).all() + + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[l] = 0 - def test_slice_float(self): + @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_slice_float(self, l, klass): # same as above, but for floats index = Index(np.arange(5.0)) + 0.1 - for s in [ - Series(range(5), index=index), - DataFrame(np.random.randn(5, 2), index=index), - ]: + s = gen_obj(klass, index) - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + expected = s.iloc[3:4] + for idxr in [lambda x: x.loc, lambda x: x]: - expected = s.iloc[3:4] - for idxr in [lambda x: x.loc, lambda x: x]: - - # getitem - result = idxr(s)[l] - if isinstance(s, Series): - tm.assert_series_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) - # setitem - s2 = s.copy() - idxr(s2)[l] = 0 - result = idxr(s2)[l].values.ravel() - assert (result == 0).all() + # getitem + result = idxr(s)[l] + if isinstance(s, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + # setitem + s2 = s.copy() + idxr(s2)[l] = 0 + result = idxr(s2)[l].values.ravel() + assert (result == 0).all() def test_floating_index_doc_example(self): From 8f4926571b2d63908ef7a14498ccf3227ce3874c Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 15 Feb 2020 02:12:48 +0100 Subject: [PATCH 035/388] started to fixturize pandas/tests/base (#31701) --- pandas/conftest.py | 67 +++++++++++++++++++++++++++++ pandas/tests/base/test_ops.py | 30 ++++++------- pandas/tests/indexes/conftest.py | 29 ------------- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_setops.py | 2 +- 5 files changed, 84 insertions(+), 46 deletions(-) delete mode 100644 pandas/tests/indexes/conftest.py diff --git a/pandas/conftest.py b/pandas/conftest.py index 41f77506bfbc8..7851cba9cd91a 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -17,6 +17,7 @@ from pandas import DataFrame import pandas._testing as tm from pandas.core import ops +from pandas.core.indexes.api import Index, MultiIndex hypothesis.settings.register_profile( "ci", @@ -953,3 +954,69 @@ def __len__(self): return self._data.__len__() return TestNonDictMapping + + +indices_dict = { + "unicode": tm.makeUnicodeIndex(100), + "string": tm.makeStringIndex(100), + "datetime": tm.makeDateIndex(100), + "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), + "period": tm.makePeriodIndex(100), + "timedelta": tm.makeTimedeltaIndex(100), + "int": tm.makeIntIndex(100), + "uint": tm.makeUIntIndex(100), + "range": tm.makeRangeIndex(100), + "float": tm.makeFloatIndex(100), + "bool": tm.makeBoolIndex(2), + "categorical": tm.makeCategoricalIndex(100), + "interval": tm.makeIntervalIndex(100), + "empty": Index([]), + "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), + "repeats": Index([0, 0, 1, 1, 2, 2]), +} + + +@pytest.fixture(params=indices_dict.keys()) +def indices(request): + # copy to avoid mutation, e.g. setting .name + return indices_dict[request.param].copy() + + +def _create_series(index): + """ Helper for the _series dict """ + size = len(index) + data = np.random.randn(size) + return pd.Series(data, index=index, name="a") + + +_series = { + f"series-with-{index_id}-index": _create_series(index) + for index_id, index in indices_dict.items() +} + + +_narrow_dtypes = [ + np.float16, + np.float32, + np.int8, + np.int16, + np.int32, + np.uint8, + np.uint16, + np.uint32, +] +_narrow_series = { + f"{dtype.__name__}-series": tm.makeFloatSeries(name="a").astype(dtype) + for dtype in _narrow_dtypes +} + +_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} + + +@pytest.fixture(params=_index_or_series_objs.keys()) +def index_or_series_obj(request): + """ + Fixture for tests on indexes, series and series with a narrow dtype + copy to avoid mutation, e.g. setting .name + """ + return _index_or_series_objs[request.param].copy(deep=True) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index e522c7f743a05..9deb56f070d56 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -109,26 +109,26 @@ def test_binary_ops(klass, op_name, op): assert expected_str in getattr(klass, "r" + op_name).__doc__ -class TestTranspose(Ops): +class TestTranspose: errmsg = "the 'axes' parameter is not supported" - def test_transpose(self): - for obj in self.objs: - tm.assert_equal(obj.transpose(), obj) + def test_transpose(self, index_or_series_obj): + obj = index_or_series_obj + tm.assert_equal(obj.transpose(), obj) - def test_transpose_non_default_axes(self): - for obj in self.objs: - with pytest.raises(ValueError, match=self.errmsg): - obj.transpose(1) - with pytest.raises(ValueError, match=self.errmsg): - obj.transpose(axes=1) + def test_transpose_non_default_axes(self, index_or_series_obj): + obj = index_or_series_obj + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(1) + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(axes=1) - def test_numpy_transpose(self): - for obj in self.objs: - tm.assert_equal(np.transpose(obj), obj) + def test_numpy_transpose(self, index_or_series_obj): + obj = index_or_series_obj + tm.assert_equal(np.transpose(obj), obj) - with pytest.raises(ValueError, match=self.errmsg): - np.transpose(obj, axes=1) + with pytest.raises(ValueError, match=self.errmsg): + np.transpose(obj, axes=1) class TestIndexOps(Ops): diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py deleted file mode 100644 index 57174f206b70d..0000000000000 --- a/pandas/tests/indexes/conftest.py +++ /dev/null @@ -1,29 +0,0 @@ -import pytest - -import pandas._testing as tm -from pandas.core.indexes.api import Index, MultiIndex - -indices_dict = { - "unicode": tm.makeUnicodeIndex(100), - "string": tm.makeStringIndex(100), - "datetime": tm.makeDateIndex(100), - "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), - "period": tm.makePeriodIndex(100), - "timedelta": tm.makeTimedeltaIndex(100), - "int": tm.makeIntIndex(100), - "uint": tm.makeUIntIndex(100), - "range": tm.makeRangeIndex(100), - "float": tm.makeFloatIndex(100), - "bool": tm.makeBoolIndex(2), - "categorical": tm.makeCategoricalIndex(100), - "interval": tm.makeIntervalIndex(100), - "empty": Index([]), - "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), - "repeats": Index([0, 0, 1, 1, 2, 2]), -} - - -@pytest.fixture(params=indices_dict.keys()) -def indices(request): - # copy to avoid mutation, e.g. setting .name - return indices_dict[request.param].copy() diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 04af9b09bbf89..c64a70af6f2a4 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -34,6 +34,7 @@ period_range, ) import pandas._testing as tm +from pandas.conftest import indices_dict from pandas.core.indexes.api import ( Index, MultiIndex, @@ -42,7 +43,6 @@ ensure_index_from_sequences, ) from pandas.tests.indexes.common import Base -from pandas.tests.indexes.conftest import indices_dict class TestIndex(Base): diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index abfa413d56655..d0cbb2ab75f72 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -13,7 +13,7 @@ from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index import pandas._testing as tm from pandas.api.types import pandas_dtype -from pandas.tests.indexes.conftest import indices_dict +from pandas.conftest import indices_dict COMPATIBLE_INCONSISTENT_PAIRS = { (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex), From 153244b32bab39b7dbd47fd2c66a02ee1f5eae46 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Fri, 14 Feb 2020 17:44:55 -0800 Subject: [PATCH 036/388] Revert 31791 (#31931) --- pandas/tests/io/test_common.py | 38 ++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 404f5a477187b..730043e6ec7d7 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -141,7 +141,24 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): pytest.importorskip(module) path = os.path.join(HERE, "data", "does_not_exist." + fn_ext) - with tm.external_error_raised(error_class): + msg1 = r"File (b')?.+does_not_exist\.{}'? does not exist".format(fn_ext) + msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'" + msg3 = "Expected object or value" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = ( + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: " + fr"'.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) + msg8 = fr"Failed to open local file.+does_not_exist\.{fn_ext}" + + with pytest.raises( + error_class, + match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7}|{msg8})", + ): reader(path) @pytest.mark.parametrize( @@ -167,7 +184,24 @@ def test_read_expands_user_home_dir( path = os.path.join("~", "does_not_exist." + fn_ext) monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x)) - with tm.external_error_raised(error_class): + msg1 = fr"File (b')?.+does_not_exist\.{fn_ext}'? does not exist" + msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'" + msg3 = "Unexpected character found when decoding 'false'" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = ( + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist: " + fr"'.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) + msg8 = fr"Failed to open local file.+does_not_exist\.{fn_ext}" + + with pytest.raises( + error_class, + match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7}|{msg8})", + ): reader(path) @pytest.mark.parametrize( From 571a73bf700da705f6be8f86b29b7e0f2e747dc1 Mon Sep 17 00:00:00 2001 From: A Brooks Date: Fri, 14 Feb 2020 20:19:38 -0600 Subject: [PATCH 037/388] CLN 29547 Replace old string formatting syntax with f-strings (#31982) --- pandas/io/sas/sas_xport.py | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 461d393dc4521..e67d68f7e0975 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -79,12 +79,12 @@ Return XportReader object for reading file incrementally.""" -_read_sas_doc = """Read a SAS file into a DataFrame. +_read_sas_doc = f"""Read a SAS file into a DataFrame. -%(_base_params_doc)s -%(_format_params_doc)s -%(_params2_doc)s -%(_iterator_doc)s +{_base_params_doc} +{_format_params_doc} +{_params2_doc} +{_iterator_doc} Returns ------- @@ -102,19 +102,13 @@ >>> for chunk in itr: >>> do_something(chunk) -""" % { - "_base_params_doc": _base_params_doc, - "_format_params_doc": _format_params_doc, - "_params2_doc": _params2_doc, - "_iterator_doc": _iterator_doc, -} - +""" -_xport_reader_doc = """\ +_xport_reader_doc = f"""\ Class for reading SAS Xport files. -%(_base_params_doc)s -%(_params2_doc)s +{_base_params_doc} +{_params2_doc} Attributes ---------- @@ -122,11 +116,7 @@ Contains information about the file fields : list Contains information about the variables in the file -""" % { - "_base_params_doc": _base_params_doc, - "_params2_doc": _params2_doc, -} - +""" _read_method_doc = """\ Read observations from SAS Xport file, returning as data frame. @@ -185,7 +175,7 @@ def _handle_truncated_float_vec(vec, nbytes): if nbytes != 8: vec1 = np.zeros(len(vec), np.dtype("S8")) - dtype = np.dtype("S%d,S%d" % (nbytes, 8 - nbytes)) + dtype = np.dtype(f"S{nbytes},S{8 - nbytes}") vec2 = vec1.view(dtype=dtype) vec2["f0"] = vec return vec2 From b41911e5e894c08b60de79a527033f21858d7073 Mon Sep 17 00:00:00 2001 From: za Date: Sat, 15 Feb 2020 16:55:11 +0700 Subject: [PATCH 038/388] DOC PR09 Add missing . on freq parameter on groupby.py (#31998) --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1d7527e73079c..6b2880810dcb2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2329,7 +2329,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): periods : int, default 1 Number of periods to shift. freq : str, optional - Frequency string + Frequency string. axis : axis to shift, default 0 fill_value : optional From 74823a037a6acc64826f6499e3cfe27f3fd5beed Mon Sep 17 00:00:00 2001 From: za Date: Sat, 15 Feb 2020 17:28:54 +0700 Subject: [PATCH 039/388] DOC PR09 Add . in the description parameter (#32001) --- pandas/_testing.py | 2 +- pandas/core/indexers.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/io/formats/style.py | 4 ++-- pandas/io/sql.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 01d2bfe0458c8..0b3004ce12013 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1014,7 +1014,7 @@ def assert_extension_array_equal( Parameters ---------- left, right : ExtensionArray - The two arrays to compare + The two arrays to compare. check_dtype : bool, default True Whether to check if the ExtensionArray dtypes are identical. check_less_precise : bool or int, default False diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index fe475527f4596..cadae9da6092f 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -296,7 +296,7 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: indexer : array-like or list-like The array-like that's used to index. List-like input that is not yet a numpy array or an ExtensionArray is converted to one. Other input - types are passed through as is + types are passed through as is. Returns ------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 986f87ffe3734..0b85433b699a8 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -85,7 +85,7 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): copy : bool Make a copy of input ndarray. freq : str or period object, optional - One of pandas period strings or corresponding objects + One of pandas period strings or corresponding objects. year : int, array, or Series, default None month : int, array, or Series, default None quarter : int, array, or Series, default None diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9c46a0036ab0d..d0c64d54f30d6 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -85,7 +85,7 @@ class Styler: number and ```` is the column number. na_rep : str, optional Representation for missing values. - If ``na_rep`` is None, no special formatting is applied + If ``na_rep`` is None, no special formatting is applied. .. versionadded:: 1.0.0 @@ -446,7 +446,7 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> "Style Parameters ---------- formatter : str, callable, dict or None - If ``formatter`` is None, the default formatter is used + If ``formatter`` is None, the default formatter is used. subset : IndexSlice An argument to ``DataFrame.loc`` that restricts which elements ``formatter`` is applied to. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 69e5a973ff706..e8666c495d39a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -356,7 +356,7 @@ def read_sql( sql : str or SQLAlchemy Selectable (select or text object) SQL query to be executed or a table name. con : SQLAlchemy connectable (engine/connection) or database str URI - or DBAPI2 connection (fallback mode)' + or DBAPI2 connection (fallback mode). Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible From e99db38878e45ef9c9dac4f5b3aef22c7216b829 Mon Sep 17 00:00:00 2001 From: Achmad Syarif Hidayatullah <30652154+asyarif93@users.noreply.github.com> Date: Sat, 15 Feb 2020 17:58:12 +0700 Subject: [PATCH 040/388] DOC PR09 Add missing . on Parameter con description (#32000) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 04e8b78fb1b87..1139d22d53e7d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2415,7 +2415,7 @@ def to_sql( library. Legacy support is provided for sqlite3.Connection objects. The user is responsible for engine disposal and connection closure for the SQLAlchemy connectable See `here \ - `_ + `_. schema : str, optional Specify the schema (if database flavor supports this). If None, use From eb97073805ad54e95767613804a62b5e6ad5f1f9 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 15 Feb 2020 15:14:11 +0200 Subject: [PATCH 041/388] CI: temporary fix to the CI (#32011) --- pandas/tests/scalar/timedelta/test_arithmetic.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 5fc991df49424..60e278f47d0f8 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -384,8 +384,11 @@ def test_td_div_nan(self, nan): result = td / nan assert result is NaT - result = td // nan - assert result is NaT + # TODO: Don't leave commented, this is just a temporary fix for + # https://github.com/pandas-dev/pandas/issues/31992 + + # result = td // nan + # assert result is NaT # --------------------------------------------------------------- # Timedelta.__rdiv__ From 2aa9cb9703a94e92f453ad88a91cdf28fe027a1f Mon Sep 17 00:00:00 2001 From: dequadras Date: Sat, 15 Feb 2020 18:07:32 +0000 Subject: [PATCH 042/388] DOC: update ohlc docstring so that it reflects the real use #31919 (#31941) --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6b2880810dcb2..cc46485b4a2e8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1447,7 +1447,7 @@ def last(x): @Appender(_common_see_also) def ohlc(self) -> DataFrame: """ - Compute sum of values, excluding missing values. + Compute open, high, low and close values of a group, excluding missing values. For multiple groupings, the result index will be a MultiIndex From 53ece7009c02eefc0befccf911d72369b0ed37f8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 15 Feb 2020 18:34:58 +0000 Subject: [PATCH 043/388] CI: silence numpy-dev failures (#32025) --- pandas/__init__.py | 1 + pandas/tests/api/test_api.py | 1 + pandas/tests/frame/test_cumulative.py | 9 ++++++- pandas/tests/groupby/test_function.py | 4 ++++ pandas/tests/groupby/test_transform.py | 3 +++ .../tests/scalar/timedelta/test_arithmetic.py | 24 +++++++++++++------ pandas/tests/series/test_cumulative.py | 7 ++++++ 7 files changed, 41 insertions(+), 8 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index d526531b159b2..2d3d3f7d92a9c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -25,6 +25,7 @@ _np_version_under1p16, _np_version_under1p17, _np_version_under1p18, + _is_numpy_dev, ) try: diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 406d5f055797d..5aab5b814bae7 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -198,6 +198,7 @@ class TestPDApi(Base): "_np_version_under1p16", "_np_version_under1p17", "_np_version_under1p18", + "_is_numpy_dev", "_testing", "_tslib", "_typing", diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index b545d6aa8afd3..2466547e2948b 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -7,8 +7,9 @@ """ import numpy as np +import pytest -from pandas import DataFrame, Series +from pandas import DataFrame, Series, _is_numpy_dev import pandas._testing as tm @@ -73,6 +74,9 @@ def test_cumprod(self, datetime_frame): df.cumprod(0) df.cumprod(1) + @pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + ) def test_cummin(self, datetime_frame): datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan @@ -96,6 +100,9 @@ def test_cummin(self, datetime_frame): cummin_xs = datetime_frame.cummin(axis=1) assert np.shape(cummin_xs) == np.shape(datetime_frame) + @pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + ) def test_cummax(self, datetime_frame): datetime_frame.loc[5:10, 0] = np.nan datetime_frame.loc[10:15, 1] = np.nan diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 73e36cb5e6c84..8830b84a52421 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -17,6 +17,7 @@ NaT, Series, Timestamp, + _is_numpy_dev, date_range, isna, ) @@ -685,6 +686,9 @@ def test_numpy_compat(func): getattr(g, func)(foo=1) +@pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" +) def test_cummin_cummax(): # GH 15048 num_types = [np.int32, np.int64, np.float32, np.float64] diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 0ad829dd4de7a..740103eec185a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -15,6 +15,7 @@ MultiIndex, Series, Timestamp, + _is_numpy_dev, concat, date_range, ) @@ -329,6 +330,8 @@ def test_transform_transformation_func(transformation_func): if transformation_func in ["pad", "backfill", "tshift", "corrwith", "cumcount"]: # These transformation functions are not yet covered in this test pytest.xfail("See GH 31269 and GH 31270") + elif _is_numpy_dev and transformation_func in ["cummin"]: + pytest.xfail("https://github.com/pandas-dev/pandas/issues/31992") elif transformation_func == "fillna": test_op = lambda x: x.transform("fillna", value=0) mock_op = lambda x: x.fillna(value=0) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 60e278f47d0f8..f0ad5fa70471b 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest import pandas as pd -from pandas import NaT, Timedelta, Timestamp, offsets +from pandas import NaT, Timedelta, Timestamp, _is_numpy_dev, offsets import pandas._testing as tm from pandas.core import ops @@ -377,18 +377,28 @@ def test_td_div_numeric_scalar(self): assert isinstance(result, Timedelta) assert result == Timedelta(days=2) - @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")]) + @pytest.mark.parametrize( + "nan", + [ + np.nan, + pytest.param( + np.float64("NaN"), + marks=pytest.mark.xfail( + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + ), + ), + float("nan"), + ], + ) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array td = Timedelta(10, unit="d") result = td / nan assert result is NaT - # TODO: Don't leave commented, this is just a temporary fix for - # https://github.com/pandas-dev/pandas/issues/31992 - - # result = td // nan - # assert result is NaT + result = td // nan + assert result is NaT # --------------------------------------------------------------- # Timedelta.__rdiv__ diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py index 885b5bf0476f2..b0065992b850a 100644 --- a/pandas/tests/series/test_cumulative.py +++ b/pandas/tests/series/test_cumulative.py @@ -11,6 +11,7 @@ import pytest import pandas as pd +from pandas import _is_numpy_dev import pandas._testing as tm @@ -37,6 +38,9 @@ def test_cumsum(self, datetime_series): def test_cumprod(self, datetime_series): _check_accum_op("cumprod", datetime_series) + @pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + ) def test_cummin(self, datetime_series): tm.assert_numpy_array_equal( datetime_series.cummin().values, @@ -49,6 +53,9 @@ def test_cummin(self, datetime_series): tm.assert_series_equal(result, expected) + @pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + ) def test_cummax(self, datetime_series): tm.assert_numpy_array_equal( datetime_series.cummax().values, From cc4c0b3809af739caba021f96b59511d6c2fd9c5 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 15 Feb 2020 19:03:20 +0000 Subject: [PATCH 044/388] CLN: D213: Multi-line docstring summary should start at the second line (#31893) --- pandas/_config/config.py | 9 +++-- pandas/_testing.py | 33 ++++++++++----- pandas/compat/chainmap.py | 3 +- pandas/core/aggregation.py | 3 +- pandas/core/arrays/base.py | 9 +++-- pandas/core/arrays/categorical.py | 3 +- pandas/core/arrays/datetimelike.py | 3 +- pandas/core/arrays/integer.py | 6 ++- pandas/core/arrays/interval.py | 3 +- pandas/core/arrays/sparse/scipy_sparse.py | 6 ++- pandas/core/computation/ops.py | 12 ++++-- pandas/core/computation/parsing.py | 3 +- pandas/core/computation/pytables.py | 6 ++- pandas/core/computation/scope.py | 3 +- pandas/core/dtypes/cast.py | 9 +++-- pandas/core/dtypes/concat.py | 3 +- pandas/core/generic.py | 37 +++++++++++------ pandas/core/indexes/category.py | 3 +- pandas/core/indexing.py | 3 +- pandas/core/internals/blocks.py | 42 +++++++++++++------- pandas/core/internals/construction.py | 3 +- pandas/core/internals/managers.py | 6 ++- pandas/core/nanops.py | 9 +++-- pandas/core/reshape/tile.py | 3 +- pandas/io/common.py | 8 ++-- pandas/io/excel/_base.py | 6 ++- pandas/io/excel/_odfreader.py | 9 +++-- pandas/io/excel/_openpyxl.py | 3 +- pandas/io/excel/_util.py | 3 +- pandas/io/excel/_xlrd.py | 3 +- pandas/io/excel/_xlwt.py | 3 +- pandas/io/formats/excel.py | 3 +- pandas/io/formats/format.py | 6 ++- pandas/io/html.py | 3 +- pandas/io/pytables.py | 15 ++++--- pandas/io/sql.py | 12 ++++-- pandas/io/stata.py | 18 ++++++--- pandas/plotting/_matplotlib/tools.py | 3 +- pandas/tests/extension/arrow/arrays.py | 3 +- pandas/tests/extension/base/__init__.py | 3 +- pandas/tests/extension/base/ops.py | 3 +- pandas/tests/extension/conftest.py | 15 ++++--- pandas/tests/extension/json/array.py | 3 +- pandas/tests/groupby/conftest.py | 3 +- pandas/tests/indexing/common.py | 3 +- pandas/tests/indexing/multiindex/conftest.py | 3 +- pandas/tests/io/conftest.py | 3 +- pandas/tests/io/pytables/common.py | 3 +- pandas/tests/resample/conftest.py | 24 +++++++---- pandas/util/_validators.py | 6 ++- 50 files changed, 258 insertions(+), 129 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 2df940817498c..f1959cd70ed3a 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -82,7 +82,8 @@ class OptionError(AttributeError, KeyError): - """Exception for pandas.options, backwards compatible with KeyError + """ + Exception for pandas.options, backwards compatible with KeyError checks """ @@ -545,7 +546,8 @@ def deprecate_option( def _select_options(pat: str) -> List[str]: - """returns a list of keys matching `pat` + """ + returns a list of keys matching `pat` if pat=="all", returns all registered options """ @@ -708,7 +710,8 @@ def pp(name: str, ks: Iterable[str]) -> List[str]: @contextmanager def config_prefix(prefix): - """contextmanager for multiple invocations of API with a common prefix + """ + contextmanager for multiple invocations of API with a common prefix supported API functions: (register / get / set )__option diff --git a/pandas/_testing.py b/pandas/_testing.py index 0b3004ce12013..b19905f1c3b5d 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -743,7 +743,8 @@ def repr_class(x): def assert_attr_equal(attr, left, right, obj="Attributes"): - """checks attributes are equal. Both objects must have attribute. + """ + checks attributes are equal. Both objects must have attribute. Parameters ---------- @@ -820,7 +821,8 @@ def assert_is_sorted(seq): def assert_categorical_equal( left, right, check_dtype=True, check_category_order=True, obj="Categorical" ): - """Test that Categoricals are equivalent. + """ + Test that Categoricals are equivalent. Parameters ---------- @@ -860,7 +862,8 @@ def assert_categorical_equal( def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray"): - """Test that two IntervalArrays are equivalent. + """ + Test that two IntervalArrays are equivalent. Parameters ---------- @@ -1009,7 +1012,8 @@ def _raise(left, right, err_msg): def assert_extension_array_equal( left, right, check_dtype=True, check_less_precise=False, check_exact=False ): - """Check that left and right ExtensionArrays are equal. + """ + Check that left and right ExtensionArrays are equal. Parameters ---------- @@ -1489,7 +1493,8 @@ def assert_sp_array_equal( check_fill_value=True, consolidate_block_indices=False, ): - """Check that the left and right SparseArray are equal. + """ + Check that the left and right SparseArray are equal. Parameters ---------- @@ -1724,7 +1729,8 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None) def all_index_generator(k=10): - """Generator which can be iterated over to get instances of all the various + """ + Generator which can be iterated over to get instances of all the various index classes. Parameters @@ -1763,7 +1769,8 @@ def index_subclass_makers_generator(): def all_timeseries_index_generator(k=10): - """Generator which can be iterated over to get instances of all the classes + """ + Generator which can be iterated over to get instances of all the classes which represent time-series. Parameters @@ -1854,7 +1861,8 @@ def makePeriodFrame(nper=None): def makeCustomIndex( nentries, nlevels, prefix="#", names=False, ndupe_l=None, idx_type=None ): - """Create an index/multindex with given dimensions, levels, names, etc' + """ + Create an index/multindex with given dimensions, levels, names, etc' nentries - number of entries in index nlevels - number of levels (> 1 produces multindex) @@ -2144,7 +2152,8 @@ def makeMissingDataframe(density=0.9, random_state=None): def optional_args(decorator): - """allows a decorator to take optional positional and keyword arguments. + """ + allows a decorator to take optional positional and keyword arguments. Assumes that taking a single, callable, positional argument means that it is decorating a function, i.e. something like this:: @@ -2216,7 +2225,8 @@ def _get_default_network_errors(): def can_connect(url, error_classes=None): - """Try to connect to the given url. True if succeeds, False if IOError + """ + Try to connect to the given url. True if succeeds, False if IOError raised Parameters @@ -2584,7 +2594,8 @@ def use_numexpr(use, min_elements=None): def test_parallel(num_threads=2, kwargs_list=None): - """Decorator to run the same function multiple times in parallel. + """ + Decorator to run the same function multiple times in parallel. Parameters ---------- diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index 588bd24ddf797..a84dbb4a661e4 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -5,7 +5,8 @@ class DeepChainMap(ChainMap[_KT, _VT]): - """Variant of ChainMap that allows direct updates to inner scopes. + """ + Variant of ChainMap that allows direct updates to inner scopes. Only works when all passed mapping are mutable. """ diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 79b87f146b9a7..448f84d58d7a0 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -98,7 +98,8 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i def _make_unique_kwarg_list( seq: Sequence[Tuple[Any, Any]] ) -> Sequence[Tuple[Any, Any]]: - """Uniquify aggfunc name of the pairs in the order list + """ + Uniquify aggfunc name of the pairs in the order list Examples: -------- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c3c91cea43f6b..b5da6d4c11616 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1,4 +1,5 @@ -"""An interface for extending pandas with custom arrays. +""" +An interface for extending pandas with custom arrays. .. warning:: @@ -213,7 +214,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): @classmethod def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): - """Construct a new ExtensionArray from a sequence of strings. + """ + Construct a new ExtensionArray from a sequence of strings. .. versionadded:: 0.24.0 @@ -961,7 +963,8 @@ def __repr__(self) -> str: return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: - """Formatting function for scalar values. + """ + Formatting function for scalar values. This is used in the default '__repr__'. The returned formatting function receives instances of your scalar type. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6c7c35e9b4763..19602010fd882 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1891,7 +1891,8 @@ def __contains__(self, key) -> bool: return contains(self, key, container=self._codes) def _tidy_repr(self, max_vals=10, footer=True) -> str: - """ a short repr displaying only max_vals and an optional (but default + """ + a short repr displaying only max_vals and an optional (but default footer) """ num = max_vals // 2 diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 07aa8d49338c8..e39d1dc03adf5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -134,7 +134,8 @@ def _simple_new(cls, values, **kwargs): @property def _scalar_type(self) -> Type[DatetimeLikeScalar]: - """The scalar associated with this datelike + """ + The scalar associated with this datelike * PeriodArray : Period * DatetimeArray : Timestamp diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 642ae6d4deacb..f1e0882def13b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -477,7 +477,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: @property def _ndarray_values(self) -> np.ndarray: - """Internal pandas method for lossy conversion to a NumPy ndarray. + """ + Internal pandas method for lossy conversion to a NumPy ndarray. This method is not part of the pandas interface. @@ -492,7 +493,8 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, float]: return self.to_numpy(na_value=np.nan), np.nan def _values_for_argsort(self) -> np.ndarray: - """Return values for sorting. + """ + Return values for sorting. Returns ------- diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index c8bb0878b564d..ab3ee5bbcdc3a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -460,7 +460,8 @@ def from_tuples(cls, data, closed="right", copy=False, dtype=None): return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) def _validate(self): - """Verify that the IntervalArray is valid. + """ + Verify that the IntervalArray is valid. Checks that diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index eff9c03386a38..e77256a5aaadd 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -17,7 +17,8 @@ def _check_is_partition(parts, whole): def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): - """ For arbitrary (MultiIndexed) sparse Series return + """ + For arbitrary (MultiIndexed) sparse Series return (v, i, j, ilabels, jlabels) where (v, (i, j)) is suitable for passing to scipy.sparse.coo constructor. """ @@ -44,7 +45,8 @@ def get_indexers(levels): # labels_to_i[:] = np.arange(labels_to_i.shape[0]) def _get_label_to_i_dict(labels, sort_labels=False): - """ Return dict of unique labels to number. + """ + Return dict of unique labels to number. Optionally sort by label. """ labels = Index(map(tuple, labels)).unique().tolist() # squish diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 5563d3ae27118..7ed089b283903 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -1,4 +1,5 @@ -"""Operator classes for eval. +""" +Operator classes for eval. """ from datetime import datetime @@ -248,7 +249,8 @@ def is_datetime(self) -> bool: def _in(x, y): - """Compute the vectorized membership of ``x in y`` if possible, otherwise + """ + Compute the vectorized membership of ``x in y`` if possible, otherwise use Python. """ try: @@ -263,7 +265,8 @@ def _in(x, y): def _not_in(x, y): - """Compute the vectorized membership of ``x not in y`` if possible, + """ + Compute the vectorized membership of ``x not in y`` if possible, otherwise use Python. """ try: @@ -445,7 +448,8 @@ def evaluate(self, env, engine: str, parser, term_type, eval_in_python): return term_type(name, env=env) def convert_values(self): - """Convert datetimes to a comparable value in an expression. + """ + Convert datetimes to a comparable value in an expression. """ def stringify(value): diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index ce213c8532834..92a2c20cd2a9e 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -1,4 +1,5 @@ -""":func:`~pandas.eval` source string parsing functions +""" +:func:`~pandas.eval` source string parsing functions """ from io import StringIO diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 097c3c22aa6c3..828ec11c2bd38 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -149,7 +149,8 @@ def is_valid(self) -> bool: @property def is_in_table(self) -> bool: - """ return True if this is a valid column name for generation (e.g. an + """ + return True if this is a valid column name for generation (e.g. an actual column in the table) """ return self.queryables.get(self.lhs) is not None @@ -175,7 +176,8 @@ def generate(self, v) -> str: return f"({self.lhs} {self.op} {val})" def convert_value(self, v) -> "TermValue": - """ convert the expression that is in the term to something that is + """ + convert the expression that is in the term to something that is accepted by pytables """ diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 70dcf4defdb52..937c81fdeb8d6 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -31,7 +31,8 @@ def ensure_scope( def _replacer(x) -> str: - """Replace a number with its hexadecimal representation. Used to tag + """ + Replace a number with its hexadecimal representation. Used to tag temporary variables with their calling scope's id. """ # get the hex repr of the binary char and remove 0x and pad by pad_size diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 011c09c9ca1ef..1c969d40c2c7f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -105,7 +105,8 @@ def is_nested_object(obj) -> bool: def maybe_downcast_to_dtype(result, dtype): - """ try to cast to the specified dtype (e.g. convert back to bool/int + """ + try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ do_round = False @@ -750,7 +751,8 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): def invalidate_string_dtypes(dtype_set): - """Change string like dtypes to object for + """ + Change string like dtypes to object for ``DataFrame.select_dtypes()``. """ non_string_dtypes = dtype_set - {np.dtype("S").type, np.dtype(" bool: def concat_categorical(to_concat, axis: int = 0): - """Concatenate an object/categorical array of arrays, each of which is a + """ + Concatenate an object/categorical array of arrays, each of which is a single dtype Parameters diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1139d22d53e7d..ad177518ba4b3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -281,21 +281,24 @@ def _validate_dtype(self, dtype): @property def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]: - """Used when a manipulation result has the same dimensions as the + """ + Used when a manipulation result has the same dimensions as the original. """ raise AbstractMethodError(self) @property def _constructor_sliced(self): - """Used when a manipulation result has one lower dimension(s) as the + """ + Used when a manipulation result has one lower dimension(s) as the original, such as DataFrame single columns slicing. """ raise AbstractMethodError(self) @property def _constructor_expanddim(self): - """Used when a manipulation result has one higher dimension as the + """ + Used when a manipulation result has one higher dimension as the original, such as Series.to_frame() """ raise NotImplementedError @@ -324,7 +327,8 @@ def _construct_axes_dict(self, axes=None, **kwargs): def _construct_axes_from_arguments( self, args, kwargs, require_all: bool = False, sentinel=None ): - """Construct and returns axes if supplied in args/kwargs. + """ + Construct and returns axes if supplied in args/kwargs. If require_all, raise if all axis arguments are not supplied return a tuple of (axes, kwargs). @@ -1713,7 +1717,8 @@ def keys(self): return self._info_axis def items(self): - """Iterate over (label, values) on info axis + """ + Iterate over (label, values) on info axis This is index for Series and columns for DataFrame. @@ -3093,18 +3098,22 @@ def to_csv( # Lookup Caching def _set_as_cached(self, item, cacher) -> None: - """Set the _cacher attribute on the calling object with a weakref to + """ + Set the _cacher attribute on the calling object with a weakref to cacher. """ self._cacher = (item, weakref.ref(cacher)) def _reset_cacher(self) -> None: - """Reset the cacher.""" + """ + Reset the cacher. + """ if hasattr(self, "_cacher"): del self._cacher def _maybe_cache_changed(self, item, value) -> None: - """The object has called back to us saying maybe it has changed. + """ + The object has called back to us saying maybe it has changed. """ self._data.set(item, value) @@ -5051,7 +5060,8 @@ def __finalize__( return self def __getattr__(self, name: str): - """After regular attribute access, try looking up the name + """ + After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ # Note: obj.x will always call obj.__getattribute__('x') prior to @@ -5069,7 +5079,8 @@ def __getattr__(self, name: str): return object.__getattribute__(self, name) def __setattr__(self, name: str, value) -> None: - """After regular attribute access, try setting the name + """ + After regular attribute access, try setting the name This allows simpler access to columns for interactive use. """ # first try regular attribute access via __getattribute__, so that @@ -5109,7 +5120,8 @@ def __setattr__(self, name: str, value) -> None: object.__setattr__(self, name, value) def _dir_additions(self): - """ add the string-like attributes from the info_axis. + """ + add the string-like attributes from the info_axis. If info_axis is a MultiIndex, it's first level values are used. """ additions = { @@ -5123,7 +5135,8 @@ def _dir_additions(self): # Consolidation of internals def _protect_consolidate(self, f): - """Consolidate _data -- if the blocks have changed, then clear the + """ + Consolidate _data -- if the blocks have changed, then clear the cache """ blocks_before = len(self._data.blocks) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index bb62d500311df..adb2ed9211bfe 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -519,7 +519,8 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): return new_target, indexer def _reindex_non_unique(self, target): - """ reindex from a non-unique; which CategoricalIndex's are almost + """ + reindex from a non-unique; which CategoricalIndex's are almost always """ new_target, indexer = self.reindex(target) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cb8b9cc04fc24..46017377f2b9c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -85,7 +85,8 @@ class IndexingError(Exception): class IndexingMixin: - """Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. + """ + Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. """ @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7d6ef11719b3a..34fa4c0e6544e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -256,7 +256,8 @@ def mgr_locs(self, new_mgr_locs): @property def array_dtype(self): - """ the dtype to return if I want to construct this block as an + """ + the dtype to return if I want to construct this block as an array """ return self.dtype @@ -374,7 +375,8 @@ def delete(self, loc): self.mgr_locs = self.mgr_locs.delete(loc) def apply(self, func, **kwargs) -> List["Block"]: - """ apply the function to my values; return a block if we are not + """ + apply the function to my values; return a block if we are not one """ with np.errstate(all="ignore"): @@ -400,7 +402,8 @@ def _split_op_result(self, result) -> List["Block"]: return [result] def fillna(self, value, limit=None, inplace=False, downcast=None): - """ fillna on the block with the value. If we fail, then convert to + """ + fillna on the block with the value. If we fail, then convert to ObjectBlock and try again """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -648,7 +651,8 @@ def convert( timedelta: bool = True, coerce: bool = False, ): - """ attempt to coerce any object types to better types return a copy + """ + attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we are not an ObjectBlock here! """ @@ -693,7 +697,8 @@ def copy(self, deep=True): def replace( self, to_replace, value, inplace=False, filter=None, regex=False, convert=True ): - """replace the to_replace value with value, possible to create new + """ + replace the to_replace value with value, possible to create new blocks here this is just a call to putmask. regex is not used here. It is used in ObjectBlocks. It is here for API compatibility. """ @@ -913,7 +918,8 @@ def setitem(self, indexer, value): return block def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): - """ putmask the data to the block; it is possible that we may create a + """ + putmask the data to the block; it is possible that we may create a new dtype of block return the resulting block(s) @@ -1446,7 +1452,8 @@ def equals(self, other) -> bool: return array_equivalent(self.values, other.values) def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): - """Return a list of unstacked blocks of self + """ + Return a list of unstacked blocks of self Parameters ---------- @@ -1584,7 +1591,8 @@ class NonConsolidatableMixIn: _validate_ndim = False def __init__(self, values, placement, ndim=None): - """Initialize a non-consolidatable block. + """ + Initialize a non-consolidatable block. 'ndim' may be inferred from 'placement'. @@ -1699,7 +1707,8 @@ def _get_unstack_items(self, unstacker, new_columns): class ExtensionBlock(NonConsolidatableMixIn, Block): - """Block for holding extension types. + """ + Block for holding extension types. Notes ----- @@ -1757,7 +1766,8 @@ def is_numeric(self): return self.values.dtype._is_numeric def setitem(self, indexer, value): - """Set the value inplace, returning a same-typed block. + """ + Set the value inplace, returning a same-typed block. This differs from Block.setitem by not allowing setitem to change the dtype of the Block. @@ -2291,7 +2301,8 @@ def _holder(self): return DatetimeArray def _maybe_coerce_values(self, values): - """Input validation for values passed to __init__. Ensure that + """ + Input validation for values passed to __init__. Ensure that we have datetime64TZ, coercing if necessary. Parameters @@ -2580,7 +2591,8 @@ def __init__(self, values, placement=None, ndim=2): @property def is_bool(self): - """ we can be a bool if we have only bool values but are of type + """ + we can be a bool if we have only bool values but are of type object """ return lib.is_bool_array(self.values.ravel()) @@ -2593,7 +2605,8 @@ def convert( timedelta: bool = True, coerce: bool = False, ): - """ attempt to coerce any object types to better types return a copy of + """ + attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! can return multiple blocks! @@ -2886,7 +2899,8 @@ def _holder(self): @property def array_dtype(self): - """ the dtype to return if I want to construct this block as an + """ + the dtype to return if I want to construct this block as an array """ return np.object_ diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 9dd4312a39525..57ed2555761be 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -534,7 +534,8 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): - """Convert list of dicts to numpy arrays + """ + Convert list of dicts to numpy arrays if `columns` is not passed, column names are inferred from the records - for OrderedDict and dicts, the column names match diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fb20b5e89ccf3..69ceb95985140 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1756,7 +1756,8 @@ def form_blocks(arrays, names, axes): def _simple_blockify(tuples, dtype): - """ return a single array of a block that has a single dtype; if dtype is + """ + return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype """ values, placement = _stack_arrays(tuples, dtype) @@ -1815,7 +1816,8 @@ def _shape_compat(x): def _interleaved_dtype( blocks: List[Block], ) -> Optional[Union[np.dtype, ExtensionDtype]]: - """Find the common dtype for `blocks`. + """ + Find the common dtype for `blocks`. Parameters ---------- diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6115c4af41b25..a5c609473760d 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -652,7 +652,8 @@ def _get_counts_nanvar( ddof: int, dtype: Dtype = float, ) -> Tuple[Union[int, np.ndarray], Union[int, np.ndarray]]: - """ Get the count of non-null values along an axis, accounting + """ + Get the count of non-null values along an axis, accounting for degrees of freedom. Parameters @@ -956,7 +957,8 @@ def nanskew( skipna: bool = True, mask: Optional[np.ndarray] = None, ) -> float: - """ Compute the sample skewness. + """ + Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly @@ -1194,7 +1196,8 @@ def _get_counts( axis: Optional[int], dtype: Dtype = float, ) -> Union[int, np.ndarray]: - """ Get the count of non-null values along an axis + """ + Get the count of non-null values along an axis Parameters ---------- diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index e499158a13b0c..86417faf6cd11 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -587,7 +587,8 @@ def _round_frac(x, precision: int): def _infer_precision(base_precision: int, bins) -> int: - """Infer an appropriate precision for _round_frac + """ + Infer an appropriate precision for _round_frac """ for precision in range(base_precision, 20): levels = [_round_frac(b, precision) for b in bins] diff --git a/pandas/io/common.py b/pandas/io/common.py index beb6c9d97aff3..c52583eed27ec 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -74,8 +74,9 @@ def is_url(url) -> bool: def _expand_user( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: - """Return the argument with an initial component of ~ or ~user - replaced by that user's home directory. + """ + Return the argument with an initial component of ~ or ~user + replaced by that user's home directory. Parameters ---------- @@ -103,7 +104,8 @@ def validate_header_arg(header) -> None: def stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: - """Attempt to convert a path-like object to a string. + """ + Attempt to convert a path-like object to a string. Parameters ---------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 70c09151258ff..97959bd125113 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -721,7 +721,8 @@ def _get_sheet_name(self, sheet_name): return sheet_name def _value_with_fmt(self, val): - """Convert numpy types to Python types for the Excel writers. + """ + Convert numpy types to Python types for the Excel writers. Parameters ---------- @@ -755,7 +756,8 @@ def _value_with_fmt(self, val): @classmethod def check_extension(cls, ext): - """checks that path's extension against the Writer's supported + """ + checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError. """ if ext.startswith("."): diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index ec5f6fcb17ff8..7af776dc1a10f 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -64,7 +64,8 @@ def get_sheet_by_name(self, name: str): raise ValueError(f"sheet {name} not found") def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - """Parse an ODF Table into a list of lists + """ + Parse an ODF Table into a list of lists """ from odf.table import CoveredTableCell, TableCell, TableRow @@ -120,7 +121,8 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: return table def _get_row_repeat(self, row) -> int: - """Return number of times this row was repeated + """ + Return number of times this row was repeated Repeating an empty row appeared to be a common way of representing sparse rows in the table. """ @@ -134,7 +136,8 @@ def _get_column_repeat(self, cell) -> int: return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1)) def _is_empty_row(self, row) -> bool: - """Helper function to find empty rows + """ + Helper function to find empty rows """ for column in row.childNodes: if len(column.childNodes) > 0: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index d35d466e6c5c9..a96c0f814e2d8 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -468,7 +468,8 @@ def write_cells( class _OpenpyxlReader(_BaseExcelReader): def __init__(self, filepath_or_buffer: FilePathOrBuffer) -> None: - """Reader using openpyxl engine. + """ + Reader using openpyxl engine. Parameters ---------- diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index a33406b6e80d7..c8d40d7141fc8 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -171,7 +171,8 @@ def _trim_excel_header(row): def _fill_mi_header(row, control_row): - """Forward fill blank entries in row but only inside the same parent index. + """ + Forward fill blank entries in row but only inside the same parent index. Used for creating headers in Multiindex. diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 16f800a6de2c9..8f7d3b1368fc7 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -9,7 +9,8 @@ class _XlrdReader(_BaseExcelReader): def __init__(self, filepath_or_buffer): - """Reader using xlrd engine. + """ + Reader using xlrd engine. Parameters ---------- diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index d102a885cef0a..78efe77e9fe2d 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -80,7 +80,8 @@ def write_cells( def _style_to_xlwt( cls, item, firstlevel: bool = True, field_sep=",", line_sep=";" ) -> str: - """helper which recursively generate an xlwt easy style string + """ + helper which recursively generate an xlwt easy style string for example: hstyle = {"font": {"bold": True}, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 28a069bc9fc1b..aac1df5dcd396 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -41,7 +41,8 @@ def __init__( class CSSToExcelConverter: - """A callable for converting CSS declarations to ExcelWriter styles + """ + A callable for converting CSS declarations to ExcelWriter styles Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), focusing on font styling, backgrounds, borders and alignment. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 55d534f975b68..0693c083c9ddc 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1832,7 +1832,8 @@ def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False) self.use_eng_prefix = use_eng_prefix def __call__(self, num: Union[int, float]) -> str: - """ Formats a number in engineering notation, appending a letter + """ + Formats a number in engineering notation, appending a letter representing the power of 1000 of the original number. Some examples: >>> format_eng(0) # for self.accuracy = 0 @@ -1930,7 +1931,8 @@ def _binify(cols: List[int], line_width: int) -> List[int]: def get_level_lengths( levels: Any, sentinel: Union[bool, object, str] = "" ) -> List[Dict[int, int]]: - """For each index in each level the function returns lengths of indexes. + """ + For each index in each level the function returns lengths of indexes. Parameters ---------- diff --git a/pandas/io/html.py b/pandas/io/html.py index ee8e96b4b3344..561570f466b68 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -600,7 +600,8 @@ def _build_doc(self): def _build_xpath_expr(attrs) -> str: - """Build an xpath expression to simulate bs4's ability to pass in kwargs to + """ + Build an xpath expression to simulate bs4's ability to pass in kwargs to search for attributes when using the lxml parser. Parameters diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1390d2d514a5e..048aa8b1915d1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3224,7 +3224,8 @@ def is_multi_index(self) -> bool: return isinstance(self.levels, list) def validate_multiindex(self, obj): - """validate that we can store the multi-index; reset and return the + """ + validate that we can store the multi-index; reset and return the new object """ levels = [ @@ -3375,7 +3376,8 @@ def validate_version(self, where=None): warnings.warn(ws, IncompatibilityWarning) def validate_min_itemsize(self, min_itemsize): - """validate the min_itemsize doesn't contain items that are not in the + """ + validate the min_itemsize doesn't contain items that are not in the axes this needs data_columns to be defined """ if min_itemsize is None: @@ -3587,7 +3589,8 @@ def get_object(cls, obj, transposed: bool): return obj def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): - """take the input data_columns and min_itemize and create a data + """ + take the input data_columns and min_itemize and create a data columns spec """ if not len(non_index_axes): @@ -3999,7 +4002,8 @@ def create_description( def read_coordinates( self, where=None, start: Optional[int] = None, stop: Optional[int] = None, ): - """select coordinates (row numbers) from a table; return the + """ + select coordinates (row numbers) from a table; return the coordinates object """ # validate the version @@ -4028,7 +4032,8 @@ def read_column( start: Optional[int] = None, stop: Optional[int] = None, ): - """return a single column from the table, generally only indexables + """ + return a single column from the table, generally only indexables are interesting """ # validate the version diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e8666c495d39a..0eb7deb5574ec 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -653,7 +653,8 @@ def create(self): self._execute_create() def _execute_insert(self, conn, keys, data_iter): - """Execute SQL statement inserting data + """ + Execute SQL statement inserting data Parameters ---------- @@ -667,7 +668,8 @@ def _execute_insert(self, conn, keys, data_iter): conn.execute(self.table.insert(), data) def _execute_insert_multi(self, conn, keys, data_iter): - """Alternative to _execute_insert for DBs support multivalue INSERT. + """ + Alternative to _execute_insert for DBs support multivalue INSERT. Note: multi-value insert is usually faster for analytics DBs and tables containing a few columns @@ -1092,7 +1094,8 @@ def read_table( schema=None, chunksize=None, ): - """Read SQL database table into a DataFrame. + """ + Read SQL database table into a DataFrame. Parameters ---------- @@ -1168,7 +1171,8 @@ def read_query( params=None, chunksize=None, ): - """Read SQL query into a DataFrame. + """ + Read SQL query into a DataFrame. Parameters ---------- diff --git a/pandas/io/stata.py b/pandas/io/stata.py index cf3251faae979..593228e99477b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -482,7 +482,8 @@ class InvalidColumnName(Warning): def _cast_to_stata_types(data: DataFrame) -> DataFrame: - """Checks the dtypes of the columns of a pandas DataFrame for + """ + Checks the dtypes of the columns of a pandas DataFrame for compatibility with the data types and ranges supported by Stata, and converts if necessary. @@ -2128,7 +2129,8 @@ def _write_bytes(self, value: bytes) -> None: self._file.write(value) def _prepare_categoricals(self, data: DataFrame) -> DataFrame: - """Check for categorical columns, retain categorical information for + """ + Check for categorical columns, retain categorical information for Stata file and convert categorical data to int """ is_cat = [is_categorical_dtype(data[col]) for col in data] @@ -2170,7 +2172,8 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: def _replace_nans(self, data: DataFrame) -> DataFrame: # return data - """Checks floating point data columns for nans, and replaces these with + """ + Checks floating point data columns for nans, and replaces these with the generic Stata for missing value (.) """ for c in data: @@ -3035,7 +3038,8 @@ def _write_header( self._write_bytes(self._tag(bio.read(), "header")) def _write_map(self) -> None: - """Called twice during file write. The first populates the values in + """ + Called twice during file write. The first populates the values in the map with 0s. The second call writes the final map locations when all blocks have been written. """ @@ -3185,7 +3189,8 @@ def _write_file_close_tag(self) -> None: self._update_map("end-of-file") def _update_strl_names(self) -> None: - """Update column names for conversion to strl if they might have been + """ + Update column names for conversion to strl if they might have been changed to comply with Stata naming rules """ # Update convert_strl if names changed @@ -3195,7 +3200,8 @@ def _update_strl_names(self) -> None: self._convert_strl[idx] = new def _convert_strls(self, data: DataFrame) -> DataFrame: - """Convert columns to StrLs if either very large or in the + """ + Convert columns to StrLs if either very large or in the convert_strl variable """ convert_cols = [ diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index dafdd6eecabc0..5743288982da4 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -100,7 +100,8 @@ def _subplots( layout_type="box", **fig_kw, ): - """Create a figure with a set of subplots already made. + """ + Create a figure with a set of subplots already made. This utility wrapper makes it convenient to create common layouts of subplots, including the enclosing figure object, in a single call. diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index b67ca4cfab83d..cd4b43c83340f 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -1,4 +1,5 @@ -"""Rudimentary Apache Arrow-backed ExtensionArray. +""" +Rudimentary Apache Arrow-backed ExtensionArray. At the moment, just a boolean array / type is implemented. Eventually, we'll want to parametrize the type and support diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index e2b6ea0304f6a..323cb843b2d74 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -1,4 +1,5 @@ -"""Base test suite for extension arrays. +""" +Base test suite for extension arrays. These tests are intended for third-party libraries to subclass to validate that their extension arrays and dtypes satisfy the interface. Moving or diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 0609f19c8e0c3..4009041218ac2 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -51,7 +51,8 @@ def _check_divmod_op(self, s, op, other, exc=Exception): class BaseArithmeticOpsTests(BaseOpsUtil): - """Various Series and DataFrame arithmetic ops methods. + """ + Various Series and DataFrame arithmetic ops methods. Subclasses supporting various ops should set the class variables to indicate that they support ops of that kind diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index d37638d37e4d6..1942d737780da 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -13,7 +13,8 @@ def dtype(): @pytest.fixture def data(): - """Length-100 array for this type. + """ + Length-100 array for this type. * data[0] and data[1] should both be non missing * data[0] and data[1] should not be equal @@ -67,7 +68,8 @@ def gen(count): @pytest.fixture def data_for_sorting(): - """Length-3 array with a known sort order. + """ + Length-3 array with a known sort order. This should be three items [B, C, A] with A < B < C @@ -77,7 +79,8 @@ def data_for_sorting(): @pytest.fixture def data_missing_for_sorting(): - """Length-3 array with a known sort order. + """ + Length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. @@ -87,7 +90,8 @@ def data_missing_for_sorting(): @pytest.fixture def na_cmp(): - """Binary operator for comparing NA values. + """ + Binary operator for comparing NA values. Should return a function of two arguments that returns True if both arguments are (scalar) NA for your type. @@ -105,7 +109,8 @@ def na_value(): @pytest.fixture def data_for_grouping(): - """Data for factorization, grouping, and unique tests. + """ + Data for factorization, grouping, and unique tests. Expected to be like [B, B, NA, NA, A, A, B, C] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index e6b147e7a4ce7..a229a824d0f9b 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -1,4 +1,5 @@ -"""Test extension array for storing nested data in a pandas container. +""" +Test extension array for storing nested data in a pandas container. The JSONArray stores lists of dictionaries. The storage mechanism is a list, not an ndarray. diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index ebac36c5f8c78..1214734358c80 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -107,7 +107,8 @@ def three_group(): @pytest.fixture(params=sorted(reduction_kernels)) def reduction_func(request): - """yields the string names of all groupby reduction functions, one at a time. + """ + yields the string names of all groupby reduction functions, one at a time. """ return request.param diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 9d55609d5db00..9cc031001f81c 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -101,7 +101,8 @@ def setup_method(self, method): setattr(self, kind, d) def generate_indices(self, f, values=False): - """ generate the indices + """ + generate the indices if values is True , use the axis values is False, use the range """ diff --git a/pandas/tests/indexing/multiindex/conftest.py b/pandas/tests/indexing/multiindex/conftest.py index 48e090b242208..0256f5e35e1db 100644 --- a/pandas/tests/indexing/multiindex/conftest.py +++ b/pandas/tests/indexing/multiindex/conftest.py @@ -20,7 +20,8 @@ def multiindex_dataframe_random_data(): @pytest.fixture def multiindex_year_month_day_dataframe_random_data(): - """DataFrame with 3 level MultiIndex (year, month, day) covering + """ + DataFrame with 3 level MultiIndex (year, month, day) covering first 100 business days from 2000-01-01 with random data """ tdf = tm.makeTimeDataFrame(100) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 7810778602e12..fe71ca77a7dda 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -27,7 +27,8 @@ def salaries_table(datapath): @pytest.fixture def s3_resource(tips_file, jsonl_file): - """Fixture for mocking S3 interaction. + """ + Fixture for mocking S3 interaction. The primary bucket name is "pandas-test". The following datasets are loaded. diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index 7f0b3ab7957e6..aad18890de3ad 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -74,7 +74,8 @@ def ensure_clean_path(path): def _maybe_remove(store, key): - """For tests using tables, try removing the table to be sure there is + """ + For tests using tables, try removing the table to be sure there is no content from previous tests using the same table name. """ try: diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index a4ac15d9f3b07..d5b71a6e4cee1 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -98,7 +98,8 @@ def _index_name(): @pytest.fixture def index(_index_factory, _index_start, _index_end, _index_freq, _index_name): - """Fixture for parametrization of date_range, period_range and + """ + Fixture for parametrization of date_range, period_range and timedelta_range indexes """ return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name) @@ -106,7 +107,8 @@ def index(_index_factory, _index_start, _index_end, _index_freq, _index_name): @pytest.fixture def _static_values(index): - """Fixture for parametrization of values used in parametrization of + """ + Fixture for parametrization of values used in parametrization of Series and DataFrames with date_range, period_range and timedelta_range indexes """ @@ -115,7 +117,8 @@ def _static_values(index): @pytest.fixture def _series_name(): - """Fixture for parametrization of Series name for Series used with + """ + Fixture for parametrization of Series name for Series used with date_range, period_range and timedelta_range indexes """ return None @@ -123,7 +126,8 @@ def _series_name(): @pytest.fixture def series(index, _series_name, _static_values): - """Fixture for parametrization of Series with date_range, period_range and + """ + Fixture for parametrization of Series with date_range, period_range and timedelta_range indexes """ return Series(_static_values, index=index, name=_series_name) @@ -131,7 +135,8 @@ def series(index, _series_name, _static_values): @pytest.fixture def empty_series(series): - """Fixture for parametrization of empty Series with date_range, + """ + Fixture for parametrization of empty Series with date_range, period_range and timedelta_range indexes """ return series[:0] @@ -139,7 +144,8 @@ def empty_series(series): @pytest.fixture def frame(index, _series_name, _static_values): - """Fixture for parametrization of DataFrame with date_range, period_range + """ + Fixture for parametrization of DataFrame with date_range, period_range and timedelta_range indexes """ # _series_name is intentionally unused @@ -148,7 +154,8 @@ def frame(index, _series_name, _static_values): @pytest.fixture def empty_frame(series): - """Fixture for parametrization of empty DataFrame with date_range, + """ + Fixture for parametrization of empty DataFrame with date_range, period_range and timedelta_range indexes """ index = series.index[:0] @@ -157,7 +164,8 @@ def empty_frame(series): @pytest.fixture(params=[Series, DataFrame]) def series_and_frame(request, series, frame): - """Fixture for parametrization of Series and DataFrame with date_range, + """ + Fixture for parametrization of Series and DataFrame with date_range, period_range and timedelta_range indexes """ if request.param == Series: diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index bfcfd1c5a7101..682575cc9ed48 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -216,7 +216,8 @@ def validate_bool_kwarg(value, arg_name): def validate_axis_style_args(data, args, kwargs, arg_name, method_name): - """Argument handler for mixed index, columns / axis functions + """ + Argument handler for mixed index, columns / axis functions In an attempt to handle both `.method(index, columns)`, and `.method(arg, axis=.)`, we have to do some bad things to argument @@ -310,7 +311,8 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): - """Validate the keyword arguments to 'fillna'. + """ + Validate the keyword arguments to 'fillna'. This checks that exactly one of 'value' and 'method' is specified. If 'method' is specified, this validates that it's a valid method. From 45d093d4f7a81930f1aa523f9c0c670174d9e243 Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Sat, 15 Feb 2020 13:04:19 -0600 Subject: [PATCH 045/388] Seventh batch of changes (#31986) --- pandas/tests/scalar/timestamp/test_constructors.py | 6 ++---- pandas/tests/scalar/timestamp/test_rendering.py | 2 +- pandas/tests/scalar/timestamp/test_unary_ops.py | 6 +++--- pandas/tests/series/methods/test_nlargest.py | 2 +- pandas/tests/series/test_analytics.py | 8 ++++---- pandas/tests/series/test_api.py | 6 ++---- pandas/tests/series/test_dtypes.py | 8 ++++---- pandas/tests/series/test_ufunc.py | 2 +- 8 files changed, 18 insertions(+), 22 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 737a85faa4c9b..b4a7173da84d0 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -314,7 +314,7 @@ def test_constructor_nanosecond(self, result): def test_constructor_invalid_Z0_isostring(self, z): # GH 8910 with pytest.raises(ValueError): - Timestamp("2014-11-02 01:00{}".format(z)) + Timestamp(f"2014-11-02 01:00{z}") @pytest.mark.parametrize( "arg", @@ -455,9 +455,7 @@ def test_disallow_setting_tz(self, tz): @pytest.mark.parametrize("offset", ["+0300", "+0200"]) def test_construct_timestamp_near_dst(self, offset): # GH 20854 - expected = Timestamp( - "2016-10-30 03:00:00{}".format(offset), tz="Europe/Helsinki" - ) + expected = Timestamp(f"2016-10-30 03:00:00{offset}", tz="Europe/Helsinki") result = Timestamp(expected).tz_convert("Europe/Helsinki") assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_rendering.py b/pandas/tests/scalar/timestamp/test_rendering.py index cab6946bb8d02..a27d233d5ab88 100644 --- a/pandas/tests/scalar/timestamp/test_rendering.py +++ b/pandas/tests/scalar/timestamp/test_rendering.py @@ -17,7 +17,7 @@ class TestTimestampRendering: ) def test_repr(self, date, freq, tz): # avoid to match with timezone name - freq_repr = "'{0}'".format(freq) + freq_repr = f"'{freq}'" if tz.startswith("dateutil"): tz_repr = tz.replace("dateutil", "") else: diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index f968144286bd4..78e795e71cd07 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -232,17 +232,17 @@ def test_round_int64(self, timestamp, freq): # test floor result = dt.floor(freq) - assert result.value % unit == 0, "floor not a {} multiple".format(freq) + assert result.value % unit == 0, f"floor not a {freq} multiple" assert 0 <= dt.value - result.value < unit, "floor error" # test ceil result = dt.ceil(freq) - assert result.value % unit == 0, "ceil not a {} multiple".format(freq) + assert result.value % unit == 0, f"ceil not a {freq} multiple" assert 0 <= result.value - dt.value < unit, "ceil error" # test round result = dt.round(freq) - assert result.value % unit == 0, "round not a {} multiple".format(freq) + assert result.value % unit == 0, f"round not a {freq} multiple" assert abs(result.value - dt.value) <= unit // 2, "round error" if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2: # round half to even diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index a029965c7394f..b1aa09f387a13 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -98,7 +98,7 @@ class TestSeriesNLargestNSmallest: ) def test_nlargest_error(self, r): dt = r.dtype - msg = "Cannot use method 'n(larg|small)est' with dtype {dt}".format(dt=dt) + msg = f"Cannot use method 'n(larg|small)est' with dtype {dt}" args = 2, len(r), 0, -1 methods = r.nlargest, r.nsmallest for method, arg in product(methods, args): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index e6e91b5d4f5f4..6f45b72154805 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -169,10 +169,10 @@ def test_validate_any_all_out_keepdims_raises(self, kwargs, func): name = func.__name__ msg = ( - r"the '{arg}' parameter is not " - r"supported in the pandas " - r"implementation of {fname}\(\)" - ).format(arg=param, fname=name) + f"the '{param}' parameter is not " + "supported in the pandas " + fr"implementation of {name}\(\)" + ) with pytest.raises(ValueError, match=msg): func(s, **kwargs) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index f96d6ddfc357e..33706c00c53f4 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -136,9 +136,7 @@ def test_constructor_subclass_dict(self, dict_subclass): def test_constructor_ordereddict(self): # GH3283 - data = OrderedDict( - ("col{i}".format(i=i), np.random.random()) for i in range(12) - ) + data = OrderedDict((f"col{i}", np.random.random()) for i in range(12)) series = Series(data) expected = Series(list(data.values()), list(data.keys())) @@ -258,7 +256,7 @@ def get_dir(s): tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), - Index(["a{}".format(i) for i in range(101)]), + Index([f"a{i}" for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 1fc582156a884..80a024eda7848 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -261,7 +261,7 @@ def test_astype_categorical_to_other(self): value = np.random.RandomState(0).randint(0, 10000, 100) df = DataFrame({"value": value}) - labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) df = df.sort_values(by=["value"], ascending=True) @@ -384,9 +384,9 @@ def test_astype_generic_timestamp_no_frequency(self, dtype): s = Series(data) msg = ( - r"The '{dtype}' dtype has no unit\. " - r"Please pass in '{dtype}\[ns\]' instead." - ).format(dtype=dtype.__name__) + fr"The '{dtype.__name__}' dtype has no unit\. " + fr"Please pass in '{dtype.__name__}\[ns\]' instead." + ) with pytest.raises(ValueError, match=msg): s.astype(dtype) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index ece7f1f21ab23..536f15ea75d69 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -287,7 +287,7 @@ def __eq__(self, other) -> bool: return type(other) is Thing and self.value == other.value def __repr__(self) -> str: - return "Thing({})".format(self.value) + return f"Thing({self.value})" s = pd.Series([Thing(1), Thing(2)]) result = np.add(s, Thing(1)) From 206a5476951d2f2f4f47b33fcfef8a064a043cb0 Mon Sep 17 00:00:00 2001 From: pan Jacek Date: Sat, 15 Feb 2020 20:05:02 +0100 Subject: [PATCH 046/388] CLN 2574 Replace old string formating (#32007) --- pandas/tests/frame/test_to_csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index aeff92971b42a..86c9a98377f3f 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -687,7 +687,7 @@ def _make_frame(names=None): df.to_csv(path) for i in [6, 7]: - msg = "len of {i}, but only 5 lines in file".format(i=i) + msg = f"len of {i}, but only 5 lines in file" with pytest.raises(ParserError, match=msg): read_csv(path, header=list(range(i)), index_col=0) @@ -744,7 +744,7 @@ def test_to_csv_withcommas(self): def test_to_csv_mixed(self): def create_cols(name): - return ["{name}{i:03d}".format(name=name, i=i) for i in range(5)] + return [f"{name}{i:03d}" for i in range(5)] df_float = DataFrame( np.random.randn(100, 5), dtype="float64", columns=create_cols("float") From 92a64c75904f251e1e05d400f4355eca18a25620 Mon Sep 17 00:00:00 2001 From: Patrick Cando <32943309+pcandoalmeida@users.noreply.github.com> Date: Sat, 15 Feb 2020 19:05:31 +0000 Subject: [PATCH 047/388] CLN: GH29547 replace old string formatting (#32029) --- pandas/tests/arrays/categorical/test_analytics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 90fcf12093909..0ff7d3e59abb3 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -15,10 +15,10 @@ class TestCategoricalAnalytics: def test_min_max_not_ordered_raises(self, aggregation): # unordered cats have no min/max cat = Categorical(["a", "b", "c", "d"], ordered=False) - msg = "Categorical is not ordered for operation {}" + msg = f"Categorical is not ordered for operation {aggregation}" agg_func = getattr(cat, aggregation) - with pytest.raises(TypeError, match=msg.format(aggregation)): + with pytest.raises(TypeError, match=msg): agg_func() def test_min_max_ordered(self): From 3689e6c2d3929b5f30ca4d3e70e026272bd33aa0 Mon Sep 17 00:00:00 2001 From: DavaIlhamHaeruzaman <60961251+DavaIlhamHaeruzaman@users.noreply.github.com> Date: Sun, 16 Feb 2020 02:06:25 +0700 Subject: [PATCH 048/388] Add period to parameter description (#32005) From 2e8274b1de4c1e16d13f61c9f141605999253a96 Mon Sep 17 00:00:00 2001 From: za Date: Sun, 16 Feb 2020 02:07:13 +0700 Subject: [PATCH 049/388] DOC SS06 Make the summery in one line on offsets.py (#32021) --- pandas/tseries/offsets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index df1e750b32138..959dd19a50d90 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1017,8 +1017,7 @@ def __init__( class CustomBusinessDay(_CustomMixin, BusinessDay): """ - DateOffset subclass representing possibly n custom business days, - excluding holidays. + DateOffset subclass representing custom business days excluding holidays. Parameters ---------- From 10228cb24dbf6c6a21006ebd5d9ce4a087d03997 Mon Sep 17 00:00:00 2001 From: andhikayusup Date: Sun, 16 Feb 2020 02:08:12 +0700 Subject: [PATCH 050/388] DOC: Improve docstring of Index.delete (#32015) --- pandas/core/indexes/base.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3d549405592d6..14ee21ea5614c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5135,9 +5135,30 @@ def delete(self, loc): """ Make new Index with passed location(-s) deleted. + Parameters + ---------- + loc : int or list of int + Location of item(-s) which will be deleted. + Use a list of locations to delete more than one value at the same time. + Returns ------- - new_index : Index + Index + New Index with passed location(-s) deleted. + + See Also + -------- + numpy.delete : Delete any rows and column from NumPy array (ndarray). + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx.delete(1) + Index(['a', 'c'], dtype='object') + + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx.delete([0, 2]) + Index(['b'], dtype='object') """ return self._shallow_copy(np.delete(self._data, loc)) From f163f255ed16f4a32ff638f2f88989bf2c4cb754 Mon Sep 17 00:00:00 2001 From: Adam W Bagaskarta Date: Sun, 16 Feb 2020 02:10:23 +0700 Subject: [PATCH 051/388] DOC: Update pandas.Series.between_time docstring params (#32014) --- pandas/core/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ad177518ba4b3..579daae2b15c6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7551,16 +7551,22 @@ def between_time( Parameters ---------- start_time : datetime.time or str + Initial time as a time filter limit. end_time : datetime.time or str + End time as a time filter limit. include_start : bool, default True + Whether the start time needs to be included in the result. include_end : bool, default True + Whether the end time needs to be included in the result. axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine range time on index or columns value. .. versionadded:: 0.24.0 Returns ------- Series or DataFrame + Data from the original object filtered to the specified dates range. Raises ------ From 3118576616fa59e6a984c8119208607adba640de Mon Sep 17 00:00:00 2001 From: DavaIlhamHaeruzaman <60961251+DavaIlhamHaeruzaman@users.noreply.github.com> Date: Sun, 16 Feb 2020 04:50:42 +0700 Subject: [PATCH 052/388] DOC: Add missing period to parameter description (#32022) --- pandas/io/formats/style.py | 2 +- pandas/io/sql.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d0c64d54f30d6..018441dacd9a8 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -452,7 +452,7 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> "Style ``formatter`` is applied to. na_rep : str, optional Representation for missing values. - If ``na_rep`` is None, no special formatting is applied + If ``na_rep`` is None, no special formatting is applied. .. versionadded:: 1.0.0 diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0eb7deb5574ec..e97872d880dee 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -361,7 +361,7 @@ def read_sql( Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible for engine disposal and connection closure for the SQLAlchemy connectable. See - `here `_ + `here `_. index_col : str or list of str, optional, default: None Column(s) to set as index(MultiIndex). coerce_float : bool, default True From a7ecced88a42c426bf61016c0131cab023c0cdff Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Sat, 15 Feb 2020 17:24:23 -0600 Subject: [PATCH 053/388] Eighth batch of changes (#32032) --- pandas/tests/test_downstream.py | 2 +- pandas/tests/test_multilevel.py | 6 +++--- pandas/tests/tools/test_numeric.py | 2 +- pandas/tests/tslibs/test_parse_iso8601.py | 2 +- pandas/tests/window/moments/test_moments_rolling.py | 10 +++------- 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 02898988ca8aa..122ef1f47968e 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -19,7 +19,7 @@ def import_module(name): try: return importlib.import_module(name) except ModuleNotFoundError: # noqa - pytest.skip("skipping as {} not available".format(name)) + pytest.skip(f"skipping as {name} not available") @pytest.fixture diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index b377ca2869bd3..efaedfad1e093 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -896,10 +896,10 @@ def test_stack_unstack_unordered_multiindex(self): values = np.arange(5) data = np.vstack( [ - ["b{}".format(x) for x in values], # b0, b1, .. - ["a{}".format(x) for x in values], + [f"b{x}" for x in values], # b0, b1, .. + [f"a{x}" for x in values], # a0, a1, .. ] - ) # a0, a1, .. + ) df = pd.DataFrame(data.T, columns=["b", "a"]) df.columns.name = "first" second_level_dict = {"x": df} diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 2fd39d5a7b703..19385e797467c 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -308,7 +308,7 @@ def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors if errors in (None, "raise"): index = int(multiple_elts) - msg = "Integer out of range. at position {index}".format(index=index) + msg = f"Integer out of range. at position {index}" with pytest.raises(ValueError, match=msg): to_numeric(arr, **kwargs) diff --git a/pandas/tests/tslibs/test_parse_iso8601.py b/pandas/tests/tslibs/test_parse_iso8601.py index a58f227c20c7f..1c01e826d9794 100644 --- a/pandas/tests/tslibs/test_parse_iso8601.py +++ b/pandas/tests/tslibs/test_parse_iso8601.py @@ -51,7 +51,7 @@ def test_parsers_iso8601(date_str, exp): ], ) def test_parsers_iso8601_invalid(date_str): - msg = 'Error parsing datetime string "{s}"'.format(s=date_str) + msg = f'Error parsing datetime string "{date_str}"' with pytest.raises(ValueError, match=msg): tslib._test_parse_iso8601(date_str) diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index fd18d37ab13b6..f3a14971ef2e7 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -860,7 +860,7 @@ def get_result(obj, window, min_periods=None, center=False): tm.assert_series_equal(result, expected) # shifter index - s = ["x{x:d}".format(x=x) for x in range(12)] + s = [f"x{x:d}" for x in range(12)] if has_min_periods: minp = 10 @@ -1437,13 +1437,9 @@ def test_rolling_median_memory_error(self): def test_rolling_min_max_numeric_types(self): # GH12373 - types_test = [np.dtype("f{}".format(width)) for width in [4, 8]] + types_test = [np.dtype(f"f{width}") for width in [4, 8]] types_test.extend( - [ - np.dtype("{}{}".format(sign, width)) - for width in [1, 2, 4, 8] - for sign in "ui" - ] + [np.dtype(f"{sign}{width}") for width in [1, 2, 4, 8] for sign in "ui"] ) for data_type in types_test: # Just testing that these don't throw exceptions and that From 4f0568e7c2c2901cb4824b8334d326cb22b8e20b Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 16 Feb 2020 17:43:56 +0000 Subject: [PATCH 054/388] CI: change np-dev xfails to not strict (#32031) --- pandas/tests/frame/test_cumulative.py | 8 ++++++-- pandas/tests/groupby/test_function.py | 4 +++- pandas/tests/scalar/timedelta/test_arithmetic.py | 1 + pandas/tests/series/test_cumulative.py | 8 ++++++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 2466547e2948b..486cbfb2761e0 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -75,7 +75,9 @@ def test_cumprod(self, datetime_frame): df.cumprod(1) @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummin(self, datetime_frame): datetime_frame.loc[5:10, 0] = np.nan @@ -101,7 +103,9 @@ def test_cummin(self, datetime_frame): assert np.shape(cummin_xs) == np.shape(datetime_frame) @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummax(self, datetime_frame): datetime_frame.loc[5:10, 0] = np.nan diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 8830b84a52421..176c0272ca527 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -687,7 +687,9 @@ def test_numpy_compat(func): @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummin_cummax(): # GH 15048 diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index f0ad5fa70471b..230a14aeec60a 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -386,6 +386,7 @@ def test_td_div_numeric_scalar(self): marks=pytest.mark.xfail( _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ), ), float("nan"), diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py index b0065992b850a..0cb1c038478f5 100644 --- a/pandas/tests/series/test_cumulative.py +++ b/pandas/tests/series/test_cumulative.py @@ -39,7 +39,9 @@ def test_cumprod(self, datetime_series): _check_accum_op("cumprod", datetime_series) @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummin(self, datetime_series): tm.assert_numpy_array_equal( @@ -54,7 +56,9 @@ def test_cummin(self, datetime_series): tm.assert_series_equal(result, expected) @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummax(self, datetime_series): tm.assert_numpy_array_equal( From 05ab8babd5cf48d52e415d822eb2fce2aaabacef Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sun, 16 Feb 2020 10:41:23 -0800 Subject: [PATCH 055/388] Added clang inline helper (#30990) --- pandas/_libs/src/inline_helper.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/inline_helper.h b/pandas/_libs/src/inline_helper.h index e203a05d2eb56..40fd45762ffe4 100644 --- a/pandas/_libs/src/inline_helper.h +++ b/pandas/_libs/src/inline_helper.h @@ -11,7 +11,9 @@ The full license is in the LICENSE file, distributed with this software. #define PANDAS__LIBS_SRC_INLINE_HELPER_H_ #ifndef PANDAS_INLINE - #if defined(__GNUC__) + #if defined(__clang__) + #define PANDAS_INLINE static __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) #define PANDAS_INLINE static __inline__ #elif defined(_MSC_VER) #define PANDAS_INLINE static __inline From 06eb8db97bce1a0730647410eceed2300afdd23a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 16 Feb 2020 16:23:54 -0800 Subject: [PATCH 056/388] CLN: Some groupby internals (#31915) * CLN: Some groupby internals * Additional annotation --- pandas/core/groupby/ops.py | 12 ++++++------ pandas/tests/groupby/test_apply.py | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 63087672d1365..7259268ac3f2b 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -169,7 +169,7 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): and not sdata.index._has_complex_internals ): try: - result_values, mutated = splitter.fast_apply(f, group_keys) + result_values, mutated = splitter.fast_apply(f, sdata, group_keys) except libreduction.InvalidApply as err: # This Exception is raised if `f` triggers an exception @@ -925,11 +925,9 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): - def fast_apply(self, f, names): + def fast_apply(self, f, sdata: FrameOrSeries, names): # must return keys::list, values::list, mutated::bool starts, ends = lib.generate_slices(self.slabels, self.ngroups) - - sdata = self._get_sorted_data() return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: @@ -939,11 +937,13 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: return sdata.iloc[:, slice_obj] -def get_splitter(data: FrameOrSeries, *args, **kwargs) -> DataSplitter: +def get_splitter( + data: FrameOrSeries, labels: np.ndarray, ngroups: int, axis: int = 0 +) -> DataSplitter: if isinstance(data, Series): klass: Type[DataSplitter] = SeriesSplitter else: # i.e. DataFrame klass = FrameSplitter - return klass(data, *args, **kwargs) + return klass(data, labels, ngroups, axis) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 41ec70468aaeb..18ad5d90b3f60 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -108,8 +108,9 @@ def f(g): splitter = grouper._get_splitter(g._selected_obj, axis=g.axis) group_keys = grouper._get_group_keys() + sdata = splitter._get_sorted_data() - values, mutated = splitter.fast_apply(f, group_keys) + values, mutated = splitter.fast_apply(f, sdata, group_keys) assert not mutated From 267d2d8635920581f362dbf5857f28cf4bea213c Mon Sep 17 00:00:00 2001 From: Stijn Van Hoey Date: Mon, 17 Feb 2020 12:49:42 +0100 Subject: [PATCH 057/388] DOC: Update of the 'getting started' pages in the sphinx section of the documentation (#31156) --- doc/data/air_quality_long.csv | 5273 ++++++++++++++ doc/data/air_quality_no2.csv | 1036 +++ doc/data/air_quality_no2_long.csv | 2069 ++++++ doc/data/air_quality_parameters.csv | 8 + doc/data/air_quality_pm25_long.csv | 1111 +++ doc/data/air_quality_stations.csv | 67 + doc/data/titanic.csv | 892 +++ doc/source/_static/css/getting_started.css | 251 + doc/source/_static/logo_r.svg | 14 + doc/source/_static/logo_sas.svg | 9 + doc/source/_static/logo_sql.svg | 73 + doc/source/_static/logo_stata.svg | 17 + .../_static/schemas/01_table_dataframe.svg | 262 + .../_static/schemas/01_table_series.svg | 127 + .../_static/schemas/01_table_spreadsheet.png | Bin 0 -> 46286 bytes .../_static/schemas/02_io_readwrite.svg | 1401 ++++ .../_static/schemas/03_subset_columns.svg | 327 + .../schemas/03_subset_columns_rows.svg | 272 + doc/source/_static/schemas/03_subset_rows.svg | 316 + .../_static/schemas/04_plot_overview.svg | 6443 +++++++++++++++++ doc/source/_static/schemas/05_newcolumn_1.svg | 347 + doc/source/_static/schemas/05_newcolumn_2.svg | 347 + doc/source/_static/schemas/05_newcolumn_3.svg | 352 + doc/source/_static/schemas/06_aggregate.svg | 211 + doc/source/_static/schemas/06_groupby.svg | 307 + .../_static/schemas/06_groupby_agg_detail.svg | 619 ++ .../schemas/06_groupby_select_detail.svg | 697 ++ doc/source/_static/schemas/06_reduction.svg | 222 + doc/source/_static/schemas/06_valuecounts.svg | 269 + doc/source/_static/schemas/07_melt.svg | 315 + doc/source/_static/schemas/07_pivot.svg | 338 + doc/source/_static/schemas/07_pivot_table.svg | 455 ++ .../_static/schemas/08_concat_column.svg | 465 ++ doc/source/_static/schemas/08_concat_row.svg | 392 + doc/source/_static/schemas/08_merge_left.svg | 608 ++ doc/source/conf.py | 1 + doc/source/getting_started/dsintro.rst | 1 + doc/source/getting_started/index.rst | 651 ++ .../intro_tutorials/01_table_oriented.rst | 218 + .../intro_tutorials/02_read_write.rst | 232 + .../intro_tutorials/03_subset_data.rst | 405 ++ .../intro_tutorials/04_plotting.rst | 252 + .../intro_tutorials/05_add_columns.rst | 186 + .../06_calculate_statistics.rst | 310 + .../07_reshape_table_layout.rst | 404 ++ .../intro_tutorials/08_combine_dataframes.rst | 326 + .../intro_tutorials/09_timeseries.rst | 389 + .../intro_tutorials/10_text_data.rst | 278 + .../getting_started/intro_tutorials/index.rst | 22 + doc/source/index.rst.template | 2 + doc/source/user_guide/reshaping.rst | 4 + doc/source/user_guide/text.rst | 3 +- 52 files changed, 29594 insertions(+), 2 deletions(-) create mode 100644 doc/data/air_quality_long.csv create mode 100644 doc/data/air_quality_no2.csv create mode 100644 doc/data/air_quality_no2_long.csv create mode 100644 doc/data/air_quality_parameters.csv create mode 100644 doc/data/air_quality_pm25_long.csv create mode 100644 doc/data/air_quality_stations.csv create mode 100644 doc/data/titanic.csv create mode 100644 doc/source/_static/css/getting_started.css create mode 100644 doc/source/_static/logo_r.svg create mode 100644 doc/source/_static/logo_sas.svg create mode 100644 doc/source/_static/logo_sql.svg create mode 100644 doc/source/_static/logo_stata.svg create mode 100644 doc/source/_static/schemas/01_table_dataframe.svg create mode 100644 doc/source/_static/schemas/01_table_series.svg create mode 100644 doc/source/_static/schemas/01_table_spreadsheet.png create mode 100644 doc/source/_static/schemas/02_io_readwrite.svg create mode 100644 doc/source/_static/schemas/03_subset_columns.svg create mode 100644 doc/source/_static/schemas/03_subset_columns_rows.svg create mode 100644 doc/source/_static/schemas/03_subset_rows.svg create mode 100644 doc/source/_static/schemas/04_plot_overview.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_1.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_2.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_3.svg create mode 100644 doc/source/_static/schemas/06_aggregate.svg create mode 100644 doc/source/_static/schemas/06_groupby.svg create mode 100644 doc/source/_static/schemas/06_groupby_agg_detail.svg create mode 100644 doc/source/_static/schemas/06_groupby_select_detail.svg create mode 100644 doc/source/_static/schemas/06_reduction.svg create mode 100644 doc/source/_static/schemas/06_valuecounts.svg create mode 100644 doc/source/_static/schemas/07_melt.svg create mode 100644 doc/source/_static/schemas/07_pivot.svg create mode 100644 doc/source/_static/schemas/07_pivot_table.svg create mode 100644 doc/source/_static/schemas/08_concat_column.svg create mode 100644 doc/source/_static/schemas/08_concat_row.svg create mode 100644 doc/source/_static/schemas/08_merge_left.svg create mode 100644 doc/source/getting_started/intro_tutorials/01_table_oriented.rst create mode 100644 doc/source/getting_started/intro_tutorials/02_read_write.rst create mode 100644 doc/source/getting_started/intro_tutorials/03_subset_data.rst create mode 100644 doc/source/getting_started/intro_tutorials/04_plotting.rst create mode 100644 doc/source/getting_started/intro_tutorials/05_add_columns.rst create mode 100644 doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst create mode 100644 doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst create mode 100644 doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst create mode 100644 doc/source/getting_started/intro_tutorials/09_timeseries.rst create mode 100644 doc/source/getting_started/intro_tutorials/10_text_data.rst create mode 100644 doc/source/getting_started/intro_tutorials/index.rst diff --git a/doc/data/air_quality_long.csv b/doc/data/air_quality_long.csv new file mode 100644 index 0000000000000..6225d65d8e276 --- /dev/null +++ b/doc/data/air_quality_long.csv @@ -0,0 +1,5273 @@ +city,country,date.utc,location,parameter,value,unit +Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,pm25,12.0,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-06-08 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-06 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-04 01:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-06-03 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-06-02 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,pm25,15.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,pm25,20.5,µg/m³ +Antwerpen,BE,2019-05-20 17:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 16:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,pm25,17.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,pm25,28.0,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,pm25,35.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,pm25,40.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,pm25,35.0,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,pm25,34.0,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,pm25,42.5,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,pm25,56.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,pm25,58.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,pm25,60.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,pm25,56.5,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,pm25,52.0,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,pm25,49.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,pm25,42.0,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,pm25,40.5,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,pm25,37.0,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-05-17 01:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,pm25,11.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-06 02:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-06 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-05 02:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-05-05 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-04 02:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-04 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-03 02:00:00+00:00,BETR801,pm25,9.5,µg/m³ +Antwerpen,BE,2019-05-03 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-05-02 02:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-02 01:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-01 02:00:00+00:00,BETR801,pm25,28.5,µg/m³ +Antwerpen,BE,2019-05-01 01:00:00+00:00,BETR801,pm25,34.5,µg/m³ +Antwerpen,BE,2019-04-30 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-04-30 01:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-04-29 02:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-04-29 01:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-04-28 02:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-04-28 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-04-27 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-04-27 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-04-26 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-04-26 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-04-25 02:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-04-25 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-04-24 02:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-04-24 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-04-23 02:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-23 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-22 02:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-04-22 01:00:00+00:00,BETR801,pm25,32.5,µg/m³ +Antwerpen,BE,2019-04-21 02:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-21 01:00:00+00:00,BETR801,pm25,27.5,µg/m³ +Antwerpen,BE,2019-04-20 02:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-20 01:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-19 01:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-18 02:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-18 01:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-04-17 03:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-17 02:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-04-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-04-16 02:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-16 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 15:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 14:00:00+00:00,BETR801,pm25,25.5,µg/m³ +Antwerpen,BE,2019-04-15 13:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 12:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 11:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-15 10:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-15 09:00:00+00:00,BETR801,pm25,21.5,µg/m³ +Antwerpen,BE,2019-04-15 08:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 07:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 06:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-15 05:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-15 04:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-04-15 03:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-15 02:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-15 01:00:00+00:00,BETR801,pm25,25.5,µg/m³ +Antwerpen,BE,2019-04-12 02:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-04-12 01:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-04-11 02:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-04-11 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-10 02:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-10 01:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-09 13:00:00+00:00,BETR801,pm25,38.0,µg/m³ +Antwerpen,BE,2019-04-09 12:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-04-09 11:00:00+00:00,BETR801,pm25,45.0,µg/m³ +Antwerpen,BE,2019-04-09 10:00:00+00:00,BETR801,pm25,44.5,µg/m³ +Antwerpen,BE,2019-04-09 09:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-04-09 08:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-04-09 07:00:00+00:00,BETR801,pm25,46.5,µg/m³ +Antwerpen,BE,2019-04-09 06:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-04-09 05:00:00+00:00,BETR801,pm25,68.0,µg/m³ +Antwerpen,BE,2019-04-09 04:00:00+00:00,BETR801,pm25,83.5,µg/m³ +Antwerpen,BE,2019-04-09 03:00:00+00:00,BETR801,pm25,99.0,µg/m³ +Antwerpen,BE,2019-04-09 02:00:00+00:00,BETR801,pm25,91.5,µg/m³ +Antwerpen,BE,2019-04-09 01:00:00+00:00,BETR801,pm25,76.0,µg/m³ +London,GB,2019-06-21 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-19 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-18 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,pm25,5.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-02 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-02 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-02 11:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-02 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 07:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 06:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 21:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 20:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 18:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 17:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 07:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 06:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 00:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-30 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 22:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 21:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 20:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 19:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 17:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 16:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 08:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 07:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 03:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-30 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-30 01:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-30 00:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-29 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-29 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-29 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 20:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 17:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 11:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-04-28 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-04-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-25 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-25 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-25 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-25 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-25 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-25 03:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-25 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-25 00:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-24 23:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-24 22:00:00+00:00,London Westminster,pm25,23.0,µg/m³ +London,GB,2019-04-24 21:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 20:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 19:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 18:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 16:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 15:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 14:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 13:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 12:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 11:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 10:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 09:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 08:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 07:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 06:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 05:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 04:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 03:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 02:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 00:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-23 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-23 22:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-23 21:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-23 20:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-23 19:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-23 18:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-23 17:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-23 16:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 15:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 14:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 13:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 12:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 11:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 10:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 09:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-23 08:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-23 07:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-23 06:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-23 05:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-23 04:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-23 03:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-23 02:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-23 01:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-23 00:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-22 23:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-22 22:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-22 21:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-22 20:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-22 19:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-22 18:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-22 17:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-22 16:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 15:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 14:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 13:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 12:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 11:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 09:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-22 08:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-22 07:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-22 06:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-22 05:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-22 04:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-22 03:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-22 02:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-22 01:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-22 00:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 22:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 18:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 16:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 15:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 14:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 13:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 12:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 11:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 10:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 09:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 08:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 07:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 06:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 05:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 04:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 03:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 02:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 01:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 00:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-20 23:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-20 22:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 18:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 16:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 15:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 14:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 13:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 12:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 11:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 10:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 09:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 08:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 07:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 06:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 05:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 04:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 03:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 02:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 01:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 00:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 22:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 18:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 17:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 16:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-19 15:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-19 14:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 13:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 12:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 11:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 10:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 09:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 08:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-19 07:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-19 06:00:00+00:00,London Westminster,pm25,31.0,µg/m³ +London,GB,2019-04-19 05:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-19 04:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-19 03:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-19 02:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-19 00:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-18 23:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-18 22:00:00+00:00,London Westminster,pm25,47.0,µg/m³ +London,GB,2019-04-18 21:00:00+00:00,London Westminster,pm25,49.0,µg/m³ +London,GB,2019-04-18 20:00:00+00:00,London Westminster,pm25,50.0,µg/m³ +London,GB,2019-04-18 19:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 18:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 17:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 16:00:00+00:00,London Westminster,pm25,52.0,µg/m³ +London,GB,2019-04-18 15:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 14:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 13:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 12:00:00+00:00,London Westminster,pm25,54.0,µg/m³ +London,GB,2019-04-18 11:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 10:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 09:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 08:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 07:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 06:00:00+00:00,London Westminster,pm25,54.0,µg/m³ +London,GB,2019-04-18 05:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 04:00:00+00:00,London Westminster,pm25,52.0,µg/m³ +London,GB,2019-04-18 03:00:00+00:00,London Westminster,pm25,50.0,µg/m³ +London,GB,2019-04-18 02:00:00+00:00,London Westminster,pm25,48.0,µg/m³ +London,GB,2019-04-18 01:00:00+00:00,London Westminster,pm25,46.0,µg/m³ +London,GB,2019-04-18 00:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-17 23:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 22:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-17 21:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 20:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 19:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 18:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 17:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 16:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-17 15:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 14:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 13:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 12:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 11:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 09:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 08:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 07:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 06:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 05:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-17 04:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 03:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 02:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-17 00:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 23:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 22:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 21:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 20:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 19:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 18:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 17:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 15:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 14:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 13:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 12:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-16 11:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-16 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-16 09:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-16 08:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-16 07:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-16 06:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-16 05:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-16 04:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-16 03:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-16 02:00:00+00:00,London Westminster,pm25,31.0,µg/m³ +London,GB,2019-04-16 00:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 23:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 22:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 21:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 20:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 19:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 18:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 17:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 16:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 15:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-15 14:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-15 13:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-15 12:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-15 11:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-15 10:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-15 09:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-15 08:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-15 07:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-15 06:00:00+00:00,London Westminster,pm25,23.0,µg/m³ +London,GB,2019-04-15 05:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-15 04:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-15 03:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-15 02:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-15 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-15 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-14 23:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-14 22:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-14 21:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-14 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-14 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-14 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 11:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-14 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-13 23:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 13:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 04:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 00:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 23:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 15:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 14:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 12:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 10:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-10 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-10 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-10 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-10 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-10 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-10 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-10 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-10 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-10 10:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-10 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-10 08:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-10 07:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-10 06:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-10 05:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-10 04:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-10 03:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-10 02:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-10 01:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-10 00:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-09 23:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-09 22:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-09 21:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-09 20:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-09 19:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-09 18:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-09 17:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-09 16:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-09 15:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-09 14:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-09 13:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-09 12:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 11:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 10:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 09:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 08:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 07:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 06:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-09 05:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-09 04:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 03:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 02:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-06-20 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-20 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-20 17:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-20 16:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-20 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-20 14:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-20 13:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-19 10:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-06-19 09:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-06-18 22:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-06-18 21:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-18 20:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-18 19:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-06-18 08:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-06-18 07:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-06-18 06:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-06-18 05:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-06-18 04:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-18 03:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-06-18 02:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-18 01:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-06-18 00:00:00+00:00,FR04014,no2,66.2,µg/m³ +Paris,FR,2019-06-17 23:00:00+00:00,FR04014,no2,73.3,µg/m³ +Paris,FR,2019-06-17 22:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-06-17 21:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-17 20:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-06-17 19:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 18:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-17 17:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-06-17 16:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-06-17 15:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-17 14:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-17 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-17 12:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-06-17 11:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 10:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-17 09:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-06-17 08:00:00+00:00,FR04014,no2,51.6,µg/m³ +Paris,FR,2019-06-17 07:00:00+00:00,FR04014,no2,54.4,µg/m³ +Paris,FR,2019-06-17 06:00:00+00:00,FR04014,no2,52.3,µg/m³ +Paris,FR,2019-06-17 05:00:00+00:00,FR04014,no2,44.8,µg/m³ +Paris,FR,2019-06-17 04:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-06-17 03:00:00+00:00,FR04014,no2,49.1,µg/m³ +Paris,FR,2019-06-17 02:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-06-17 01:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-17 00:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-06-16 23:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-06-16 22:00:00+00:00,FR04014,no2,56.6,µg/m³ +Paris,FR,2019-06-16 21:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-16 20:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-16 18:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-06-16 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-16 16:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-16 15:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-06-16 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 12:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 11:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-06-16 10:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 09:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-16 08:00:00+00:00,FR04014,no2,9.9,µg/m³ +Paris,FR,2019-06-16 07:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-16 06:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-06-16 05:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-16 04:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-16 03:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 02:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-16 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-06-16 00:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-15 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-15 22:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-15 21:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-06-15 20:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-15 19:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-15 18:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 17:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 16:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-15 15:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-06-15 14:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-15 13:00:00+00:00,FR04014,no2,9.0,µg/m³ +Paris,FR,2019-06-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-15 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 10:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-06-15 09:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 08:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-06-15 07:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-15 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-15 02:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-06-15 01:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-15 00:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-14 23:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-14 22:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-14 21:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-06-14 20:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-14 19:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-14 18:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-14 17:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-14 16:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-06-14 15:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-14 14:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-14 12:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-06-14 11:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-14 10:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-06-14 09:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-06-14 08:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-06-14 07:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-14 06:00:00+00:00,FR04014,no2,64.3,µg/m³ +Paris,FR,2019-06-14 05:00:00+00:00,FR04014,no2,49.3,µg/m³ +Paris,FR,2019-06-14 04:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-14 03:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-06-14 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-06-14 01:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-14 00:00:00+00:00,FR04014,no2,74.2,µg/m³ +Paris,FR,2019-06-13 23:00:00+00:00,FR04014,no2,78.3,µg/m³ +Paris,FR,2019-06-13 22:00:00+00:00,FR04014,no2,77.9,µg/m³ +Paris,FR,2019-06-13 21:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-13 20:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-06-13 19:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-13 18:00:00+00:00,FR04014,no2,24.0,µg/m³ +Paris,FR,2019-06-13 17:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-13 16:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-13 15:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-13 14:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-13 13:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-06-13 12:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-13 11:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-06-13 10:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-13 09:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-13 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-13 07:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-13 06:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-13 05:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-06-13 04:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-13 03:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-06-13 02:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-13 01:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-06-13 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-12 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-06-12 22:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-06-12 21:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-12 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-06-12 19:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-12 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-12 17:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-06-12 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-06-12 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-06-12 14:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-06-12 13:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-12 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-12 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 09:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-12 08:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-12 07:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-06-12 06:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-06-12 05:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-12 04:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-06-12 03:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-06-12 02:00:00+00:00,FR04014,no2,34.7,µg/m³ +Paris,FR,2019-06-12 01:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-12 00:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-11 23:00:00+00:00,FR04014,no2,41.5,µg/m³ +Paris,FR,2019-06-11 22:00:00+00:00,FR04014,no2,59.4,µg/m³ +Paris,FR,2019-06-11 21:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-06-11 20:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-11 19:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-11 18:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-11 17:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-11 16:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-11 15:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-06-11 14:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-11 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-11 12:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-06-11 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-06-11 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-11 09:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-11 08:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-11 07:00:00+00:00,FR04014,no2,58.0,µg/m³ +Paris,FR,2019-06-11 06:00:00+00:00,FR04014,no2,55.4,µg/m³ +Paris,FR,2019-06-11 05:00:00+00:00,FR04014,no2,58.7,µg/m³ +Paris,FR,2019-06-11 04:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-06-11 03:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-06-11 02:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-11 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-11 00:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-10 23:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-10 22:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-10 21:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-06-10 20:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-10 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-06-10 18:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-06-10 17:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-10 16:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-10 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 14:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-06-10 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-10 12:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-10 10:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-10 09:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-06-10 08:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-10 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-10 06:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-10 05:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-06-10 04:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-06-10 03:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-10 02:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-10 01:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-10 00:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-06-09 23:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-06-09 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-06-09 21:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-06-09 20:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-06-09 19:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-06-09 18:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-09 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-09 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-09 15:00:00+00:00,FR04014,no2,7.2,µg/m³ +Paris,FR,2019-06-09 14:00:00+00:00,FR04014,no2,7.9,µg/m³ +Paris,FR,2019-06-09 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-09 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 11:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 10:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-09 09:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-09 08:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-09 07:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-09 06:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-09 05:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-06-09 04:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-06-09 03:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-09 02:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-06-09 01:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-06-09 00:00:00+00:00,FR04014,no2,55.9,µg/m³ +Paris,FR,2019-06-08 23:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-06-08 22:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-06-08 21:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-08 18:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-06-08 17:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-06-08 16:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 14:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 13:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-08 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-08 11:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-08 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 08:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-08 07:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-08 06:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-08 05:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 04:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-08 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-08 02:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-08 01:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-08 00:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-06-07 23:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-07 22:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-06-07 21:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-06-07 20:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-07 19:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-06-07 18:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-07 17:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 15:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-07 14:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-07 13:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-07 12:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-07 11:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-07 10:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-06-07 08:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-07 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-07 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-06 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-06 13:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-06 12:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-06 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-06-06 10:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-06-06 09:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-06-06 08:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-06-06 07:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-06-06 06:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-06-06 05:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-06-06 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-06-06 03:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-06-06 02:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-06 01:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-06 00:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-06-05 23:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-06-05 22:00:00+00:00,FR04014,no2,30.3,µg/m³ +Paris,FR,2019-06-05 21:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-06-05 20:00:00+00:00,FR04014,no2,37.5,µg/m³ +Paris,FR,2019-06-05 19:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-06-05 18:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-06-05 17:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-06-05 16:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-05 15:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-05 14:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-05 13:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-06-05 12:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-06-05 11:00:00+00:00,FR04014,no2,59.0,µg/m³ +Paris,FR,2019-06-05 10:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-06-05 09:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-06-05 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-05 07:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-05 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-05 05:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-05 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-05 03:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-06-05 02:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-06-05 01:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-06-05 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-04 23:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-04 22:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-06-04 21:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 20:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-04 19:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-04 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-06-04 17:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-04 16:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 15:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-06-04 14:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-04 13:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-06-04 12:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-06-04 11:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-04 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-04 09:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-06-04 08:00:00+00:00,FR04014,no2,50.8,µg/m³ +Paris,FR,2019-06-04 07:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-04 06:00:00+00:00,FR04014,no2,47.7,µg/m³ +Paris,FR,2019-06-04 05:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-06-04 04:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-04 03:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-06-04 02:00:00+00:00,FR04014,no2,35.0,µg/m³ +Paris,FR,2019-06-04 01:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-04 00:00:00+00:00,FR04014,no2,52.4,µg/m³ +Paris,FR,2019-06-03 23:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-03 22:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-06-03 21:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-06-03 20:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-06-03 19:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-03 18:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-03 17:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-06-03 16:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-03 15:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-03 14:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-03 13:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-03 12:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-03 11:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-03 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-03 09:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-06-03 08:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-03 07:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-06-03 06:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-06-03 05:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-03 04:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-03 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-03 02:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-03 01:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-03 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-02 23:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-02 22:00:00+00:00,FR04014,no2,27.6,µg/m³ +Paris,FR,2019-06-02 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-02 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-02 19:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-02 18:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-02 17:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 16:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 15:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-06-02 14:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-02 13:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-02 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-02 11:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-02 10:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 09:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-06-02 08:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-02 07:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 06:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-02 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-02 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-02 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-02 02:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-02 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-02 00:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-06-01 23:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-01 22:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-06-01 21:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-06-01 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-01 19:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-01 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-06-01 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 16:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 15:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 14:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-06-01 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 12:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-01 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-01 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-01 09:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-01 08:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-06-01 07:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-06-01 06:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-01 02:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-01 01:00:00+00:00,FR04014,no2,74.8,µg/m³ +Paris,FR,2019-06-01 00:00:00+00:00,FR04014,no2,84.7,µg/m³ +Paris,FR,2019-05-31 23:00:00+00:00,FR04014,no2,81.7,µg/m³ +Paris,FR,2019-05-31 22:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-05-31 21:00:00+00:00,FR04014,no2,60.2,µg/m³ +Paris,FR,2019-05-31 20:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-31 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-31 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-31 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-31 16:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-31 15:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 14:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 13:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-31 12:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-31 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-31 10:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-31 09:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-31 08:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-31 07:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-31 06:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-31 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-31 04:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-31 03:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-31 02:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-31 01:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-31 00:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-30 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-30 22:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-30 21:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-05-30 20:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-30 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-30 18:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-30 17:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-30 16:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-30 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-30 14:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 13:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-30 12:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-05-30 11:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-30 09:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-30 08:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-30 07:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-05-30 06:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-30 05:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-30 04:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-30 03:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-30 02:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-30 01:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-05-30 00:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-29 23:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-29 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 21:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-29 20:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-29 19:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-29 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-29 16:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-29 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 13:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-29 12:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-29 11:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-29 10:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-29 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-29 08:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-05-29 07:00:00+00:00,FR04014,no2,50.5,µg/m³ +Paris,FR,2019-05-29 06:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-29 05:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-29 04:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 03:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-29 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-29 00:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-28 23:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-28 22:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-05-28 21:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 20:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 19:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 18:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-28 17:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-28 16:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-28 15:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-28 14:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-28 13:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 12:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-28 11:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-28 10:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-28 09:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-28 08:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-28 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-28 06:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-28 05:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-28 04:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-05-28 03:00:00+00:00,FR04014,no2,6.1,µg/m³ +Paris,FR,2019-05-28 02:00:00+00:00,FR04014,no2,6.4,µg/m³ +Paris,FR,2019-05-28 01:00:00+00:00,FR04014,no2,8.2,µg/m³ +Paris,FR,2019-05-28 00:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-27 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-05-27 22:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-27 21:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-27 20:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-27 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-27 18:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-27 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-27 15:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 14:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 13:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-27 12:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 11:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-27 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-27 09:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-05-27 08:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-27 07:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-27 06:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-05-27 05:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-27 04:00:00+00:00,FR04014,no2,6.5,µg/m³ +Paris,FR,2019-05-27 03:00:00+00:00,FR04014,no2,4.8,µg/m³ +Paris,FR,2019-05-27 02:00:00+00:00,FR04014,no2,5.9,µg/m³ +Paris,FR,2019-05-27 01:00:00+00:00,FR04014,no2,7.1,µg/m³ +Paris,FR,2019-05-27 00:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-05-26 23:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 22:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-26 21:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-26 20:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-26 19:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-26 18:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-26 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-26 16:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-26 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-26 14:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-26 13:00:00+00:00,FR04014,no2,12.5,µg/m³ +Paris,FR,2019-05-26 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-05-26 11:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-26 10:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-26 09:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 08:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-26 07:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-26 06:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-26 05:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-26 04:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-26 03:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-26 02:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-26 01:00:00+00:00,FR04014,no2,49.8,µg/m³ +Paris,FR,2019-05-26 00:00:00+00:00,FR04014,no2,67.0,µg/m³ +Paris,FR,2019-05-25 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-05-25 22:00:00+00:00,FR04014,no2,63.9,µg/m³ +Paris,FR,2019-05-25 21:00:00+00:00,FR04014,no2,39.5,µg/m³ +Paris,FR,2019-05-25 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-25 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-25 18:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-25 17:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-25 16:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-05-25 15:00:00+00:00,FR04014,no2,30.0,µg/m³ +Paris,FR,2019-05-25 14:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-25 13:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-25 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-05-25 11:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-25 10:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-05-25 09:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-25 08:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-05-25 07:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-25 06:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-25 02:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-25 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-25 00:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-05-24 23:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-24 22:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-24 21:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-05-24 20:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-24 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-24 18:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-24 17:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-24 16:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-24 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-24 14:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-24 13:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-24 12:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-24 11:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-05-24 10:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-24 09:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-24 08:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-05-24 07:00:00+00:00,FR04014,no2,54.8,µg/m³ +Paris,FR,2019-05-24 06:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-05-24 05:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-24 04:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-24 03:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-24 02:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-24 01:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-24 00:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-23 23:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-23 22:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-23 21:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-05-23 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-23 19:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-05-23 18:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-23 17:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-23 16:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-23 15:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-23 14:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-23 13:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-05-23 12:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-23 11:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-23 10:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-23 09:00:00+00:00,FR04014,no2,79.4,µg/m³ +Paris,FR,2019-05-23 08:00:00+00:00,FR04014,no2,97.0,µg/m³ +Paris,FR,2019-05-23 07:00:00+00:00,FR04014,no2,91.8,µg/m³ +Paris,FR,2019-05-23 06:00:00+00:00,FR04014,no2,79.6,µg/m³ +Paris,FR,2019-05-23 05:00:00+00:00,FR04014,no2,68.7,µg/m³ +Paris,FR,2019-05-23 04:00:00+00:00,FR04014,no2,71.9,µg/m³ +Paris,FR,2019-05-23 03:00:00+00:00,FR04014,no2,76.8,µg/m³ +Paris,FR,2019-05-23 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-05-23 01:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-23 00:00:00+00:00,FR04014,no2,53.3,µg/m³ +Paris,FR,2019-05-22 23:00:00+00:00,FR04014,no2,62.1,µg/m³ +Paris,FR,2019-05-22 22:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-05-22 21:00:00+00:00,FR04014,no2,37.7,µg/m³ +Paris,FR,2019-05-22 20:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-05-22 19:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-22 18:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-22 17:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-05-22 16:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-22 15:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-22 14:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-22 13:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-05-22 12:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-05-22 11:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-05-22 10:00:00+00:00,FR04014,no2,57.8,µg/m³ +Paris,FR,2019-05-22 09:00:00+00:00,FR04014,no2,63.1,µg/m³ +Paris,FR,2019-05-22 08:00:00+00:00,FR04014,no2,70.8,µg/m³ +Paris,FR,2019-05-22 07:00:00+00:00,FR04014,no2,75.4,µg/m³ +Paris,FR,2019-05-22 06:00:00+00:00,FR04014,no2,75.7,µg/m³ +Paris,FR,2019-05-22 05:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-22 04:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-22 03:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-22 02:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-22 01:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-22 00:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-21 23:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-21 22:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-21 21:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-05-21 20:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-05-21 19:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-05-21 18:00:00+00:00,FR04014,no2,54.3,µg/m³ +Paris,FR,2019-05-21 17:00:00+00:00,FR04014,no2,75.0,µg/m³ +Paris,FR,2019-05-21 16:00:00+00:00,FR04014,no2,42.3,µg/m³ +Paris,FR,2019-05-21 15:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-21 14:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-05-21 13:00:00+00:00,FR04014,no2,49.7,µg/m³ +Paris,FR,2019-05-21 12:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-05-21 11:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-05-21 10:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-21 09:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-21 08:00:00+00:00,FR04014,no2,54.2,µg/m³ +Paris,FR,2019-05-21 07:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-21 06:00:00+00:00,FR04014,no2,62.6,µg/m³ +Paris,FR,2019-05-21 05:00:00+00:00,FR04014,no2,38.0,µg/m³ +Paris,FR,2019-05-21 04:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-21 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-21 02:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-21 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-21 00:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-20 23:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-20 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-20 21:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-20 20:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-20 19:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-20 18:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-20 17:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-20 16:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-20 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-20 14:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-20 13:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-05-20 12:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-20 11:00:00+00:00,FR04014,no2,35.4,µg/m³ +Paris,FR,2019-05-20 10:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-05-20 09:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-05-20 08:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-20 07:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-05-20 06:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-20 05:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-20 04:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-20 03:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-05-20 02:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-20 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-20 00:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-19 23:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-19 22:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-19 21:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-19 20:00:00+00:00,FR04014,no2,35.6,µg/m³ +Paris,FR,2019-05-19 19:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-05-19 18:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-19 17:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-19 16:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-19 15:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 14:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-19 13:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-19 12:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-19 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-19 10:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-19 09:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-19 08:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 07:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-19 06:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-19 05:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-05-19 04:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-19 03:00:00+00:00,FR04014,no2,36.4,µg/m³ +Paris,FR,2019-05-19 02:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-19 01:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-19 00:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-05-18 23:00:00+00:00,FR04014,no2,50.2,µg/m³ +Paris,FR,2019-05-18 22:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-05-18 21:00:00+00:00,FR04014,no2,59.3,µg/m³ +Paris,FR,2019-05-18 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-18 19:00:00+00:00,FR04014,no2,67.5,µg/m³ +Paris,FR,2019-05-18 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-05-18 17:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-18 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-18 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-18 14:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-05-18 13:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-18 12:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-18 11:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-18 10:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-18 09:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-18 08:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-18 07:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-18 06:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-18 05:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-18 04:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-18 03:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-18 02:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-18 01:00:00+00:00,FR04014,no2,37.4,µg/m³ +Paris,FR,2019-05-18 00:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-05-17 23:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-17 22:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-17 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-17 20:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-17 19:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-05-17 18:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-17 17:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-17 16:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-17 15:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-17 14:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-17 13:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-17 12:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-17 11:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-17 10:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-05-17 09:00:00+00:00,FR04014,no2,60.5,µg/m³ +Paris,FR,2019-05-17 08:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-05-17 07:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-05-17 06:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-17 05:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-17 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-17 03:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-17 02:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-17 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-17 00:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-16 23:00:00+00:00,FR04014,no2,43.7,µg/m³ +Paris,FR,2019-05-16 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-05-16 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-16 20:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-05-16 18:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-16 17:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-16 15:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-05-16 13:00:00+00:00,FR04014,no2,8.5,µg/m³ +Paris,FR,2019-05-16 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-16 11:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-16 10:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 09:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-16 08:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-16 07:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-16 05:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-05-16 04:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-16 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-16 02:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-16 01:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-16 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-15 23:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-15 22:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-15 21:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-15 20:00:00+00:00,FR04014,no2,30.1,µg/m³ +Paris,FR,2019-05-15 19:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-15 18:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-15 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 16:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-15 15:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 14:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-05-15 13:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-15 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 09:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 08:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-15 07:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-15 06:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-15 05:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-15 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-15 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-15 02:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-15 01:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-15 00:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-14 23:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-14 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-14 21:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-14 20:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-14 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-14 18:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-14 17:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-14 16:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-14 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-14 14:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-14 13:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-14 12:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-05-14 11:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-14 10:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-14 09:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 08:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-14 07:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-14 06:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-14 05:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-14 04:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-05-14 03:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-14 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-14 00:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-05-13 23:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-13 22:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-13 21:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-13 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-13 19:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-13 18:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-13 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-13 16:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-13 15:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-13 14:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-05-13 13:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-13 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-13 11:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-13 10:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-13 09:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-13 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-13 07:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-05-13 06:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-05-13 05:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-13 04:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-13 03:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 02:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-13 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 00:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-12 23:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-12 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-12 21:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-12 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-12 19:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-12 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-12 17:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-05-12 16:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 15:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-12 14:00:00+00:00,FR04014,no2,9.1,µg/m³ +Paris,FR,2019-05-12 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-05-12 12:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-05-12 11:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 10:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 08:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-12 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-12 06:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-12 05:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 04:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-12 03:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-05-12 02:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-12 01:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 00:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-11 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-11 22:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-11 21:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-11 20:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-05-11 19:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-11 18:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-11 17:00:00+00:00,FR04014,no2,32.0,µg/m³ +Paris,FR,2019-05-11 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-11 15:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-11 14:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-11 13:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-11 12:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-05-11 11:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-11 10:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-05-11 09:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-05-11 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-11 07:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-11 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-11 02:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-11 01:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-11 00:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-10 23:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-10 22:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-10 21:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-10 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-10 19:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-10 18:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-10 17:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 16:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-10 15:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-10 14:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-10 13:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-10 12:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-10 11:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-10 10:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-10 09:00:00+00:00,FR04014,no2,53.4,µg/m³ +Paris,FR,2019-05-10 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-05-10 07:00:00+00:00,FR04014,no2,57.3,µg/m³ +Paris,FR,2019-05-10 06:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-10 05:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 04:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-10 03:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-05-10 02:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-05-10 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-10 00:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-09 23:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-09 22:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-09 21:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-09 19:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-09 18:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-09 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-09 16:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-09 15:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-09 14:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-09 13:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-09 12:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-09 11:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-09 10:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-09 09:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-05-09 08:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-09 07:00:00+00:00,FR04014,no2,49.0,µg/m³ +Paris,FR,2019-05-09 06:00:00+00:00,FR04014,no2,50.7,µg/m³ +Paris,FR,2019-05-09 05:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 04:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-09 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-09 02:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-09 01:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-09 00:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-05-08 23:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-08 22:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-08 21:00:00+00:00,FR04014,no2,48.9,µg/m³ +Paris,FR,2019-05-08 20:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-08 19:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-08 18:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-05-08 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-08 16:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-08 15:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-08 14:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-08 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-05-08 12:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-08 11:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-08 10:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-08 09:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-05-08 08:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-08 07:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-08 06:00:00+00:00,FR04014,no2,21.7,µg/m³ +Paris,FR,2019-05-08 05:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-08 04:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-08 03:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-08 02:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-08 01:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-08 00:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-07 23:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-07 22:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-07 21:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-07 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-07 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-07 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-07 17:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-07 16:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-07 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-07 14:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-07 13:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-07 12:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-07 11:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-07 10:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-07 08:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-07 07:00:00+00:00,FR04014,no2,67.9,µg/m³ +Paris,FR,2019-05-07 06:00:00+00:00,FR04014,no2,77.7,µg/m³ +Paris,FR,2019-05-07 05:00:00+00:00,FR04014,no2,72.4,µg/m³ +Paris,FR,2019-05-07 04:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-07 03:00:00+00:00,FR04014,no2,50.4,µg/m³ +Paris,FR,2019-05-07 02:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-07 00:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-05-06 23:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-06 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-06 21:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-06 20:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-06 19:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-06 18:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-06 17:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-06 16:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-05-06 15:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-06 14:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-06 13:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-06 12:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-06 11:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-05-06 10:00:00+00:00,FR04014,no2,42.4,µg/m³ +Paris,FR,2019-05-06 09:00:00+00:00,FR04014,no2,44.2,µg/m³ +Paris,FR,2019-05-06 08:00:00+00:00,FR04014,no2,52.5,µg/m³ +Paris,FR,2019-05-06 07:00:00+00:00,FR04014,no2,68.9,µg/m³ +Paris,FR,2019-05-06 06:00:00+00:00,FR04014,no2,62.4,µg/m³ +Paris,FR,2019-05-06 05:00:00+00:00,FR04014,no2,56.7,µg/m³ +Paris,FR,2019-05-06 04:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-06 03:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-06 02:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-06 01:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-06 00:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-05 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-05 22:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-05 21:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-05 20:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-05 19:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-05 18:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-05 17:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-05 16:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-05 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-05 14:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-05-05 13:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-05 12:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-05 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-05-05 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-05 09:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-05-05 08:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-05 07:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-05 06:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-05 05:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-05 04:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-05 03:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-05 02:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-05 01:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-05 00:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-04 23:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-04 22:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-04 21:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-04 20:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-04 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-04 18:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-04 17:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-04 16:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-04 15:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-04 14:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-04 13:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-04 12:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-04 11:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-04 10:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-04 09:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-04 08:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-05-04 07:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-04 06:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-04 05:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-04 04:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-05-04 03:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-04 02:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-04 01:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-04 00:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-03 23:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-05-03 22:00:00+00:00,FR04014,no2,43.2,µg/m³ +Paris,FR,2019-05-03 21:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-05-03 20:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-03 19:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-03 18:00:00+00:00,FR04014,no2,59.6,µg/m³ +Paris,FR,2019-05-03 17:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-03 16:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-03 15:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-03 14:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-03 13:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-03 12:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-03 11:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-05-03 10:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-03 09:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-05-03 08:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-05-03 07:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-03 06:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-03 05:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-03 04:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-03 03:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-05-03 02:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-03 01:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-03 00:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-02 23:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-02 22:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-02 21:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-02 20:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-02 19:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-02 18:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-02 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-02 16:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-02 15:00:00+00:00,FR04014,no2,41.4,µg/m³ +Paris,FR,2019-05-02 14:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-05-02 13:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-02 12:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-02 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-02 10:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-02 09:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-02 08:00:00+00:00,FR04014,no2,55.5,µg/m³ +Paris,FR,2019-05-02 07:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-05-02 06:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-05-02 05:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-02 04:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-02 03:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-02 02:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-02 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-02 00:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-01 23:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-01 22:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-01 21:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-01 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-01 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-01 18:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-05-01 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-01 16:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-01 15:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-01 14:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-01 13:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-05-01 12:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-01 11:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-01 10:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-05-01 09:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-05-01 08:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-05-01 07:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-01 06:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-01 05:00:00+00:00,FR04014,no2,28.5,µg/m³ +Paris,FR,2019-05-01 04:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-05-01 03:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-05-01 02:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-01 01:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-01 00:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-30 23:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-04-30 22:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-04-30 21:00:00+00:00,FR04014,no2,42.8,µg/m³ +Paris,FR,2019-04-30 20:00:00+00:00,FR04014,no2,39.6,µg/m³ +Paris,FR,2019-04-30 19:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-30 18:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-04-30 17:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-04-30 16:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-30 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-30 14:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-04-30 13:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-30 12:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-30 11:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-04-30 10:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-30 09:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-04-30 08:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-04-30 07:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-04-30 06:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-04-30 05:00:00+00:00,FR04014,no2,37.3,µg/m³ +Paris,FR,2019-04-30 04:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-04-30 03:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-04-30 02:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-04-30 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-30 00:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-04-29 23:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-04-29 22:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-29 21:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-04-29 20:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-29 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-29 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-04-29 17:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-04-29 16:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-29 15:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-04-29 14:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-04-29 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-04-29 12:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-04-29 11:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-04-29 10:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-29 09:00:00+00:00,FR04014,no2,28.5,µg/m³ +Paris,FR,2019-04-29 08:00:00+00:00,FR04014,no2,39.1,µg/m³ +Paris,FR,2019-04-29 07:00:00+00:00,FR04014,no2,45.4,µg/m³ +Paris,FR,2019-04-29 06:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-04-29 05:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-29 04:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-04-29 03:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-29 02:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-04-29 01:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-04-29 00:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-28 23:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-04-28 22:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-04-28 21:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-04-28 20:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-28 19:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-04-28 18:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-04-28 17:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-04-28 16:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-04-28 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-04-28 14:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-04-28 13:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-04-28 12:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-28 11:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-04-28 10:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-04-28 09:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-04-28 08:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-04-28 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-28 06:00:00+00:00,FR04014,no2,13.6,µg/m³ +Paris,FR,2019-04-28 05:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-28 04:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-04-28 03:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-04-28 02:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-04-28 01:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-04-28 00:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-27 23:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-04-27 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-04-27 21:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-04-27 20:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-27 19:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-04-27 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-27 17:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-04-27 16:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-27 15:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-04-27 14:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-27 13:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-04-27 12:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-04-27 11:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-04-27 10:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-04-27 09:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-27 08:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-04-27 07:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-04-27 06:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-04-27 05:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-04-27 04:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-04-27 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-04-27 02:00:00+00:00,FR04014,no2,8.6,µg/m³ +Paris,FR,2019-04-27 01:00:00+00:00,FR04014,no2,9.3,µg/m³ +Paris,FR,2019-04-27 00:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-04-26 23:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-04-26 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-26 21:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-04-26 20:00:00+00:00,FR04014,no2,38.7,µg/m³ +Paris,FR,2019-04-26 19:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-04-26 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-26 17:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-04-26 16:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-26 15:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-26 14:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-26 13:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-26 12:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-04-26 11:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-26 10:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-04-26 09:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-04-26 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-04-26 07:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-04-26 06:00:00+00:00,FR04014,no2,61.8,µg/m³ +Paris,FR,2019-04-26 05:00:00+00:00,FR04014,no2,70.9,µg/m³ +Paris,FR,2019-04-26 04:00:00+00:00,FR04014,no2,58.3,µg/m³ +Paris,FR,2019-04-26 03:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-04-26 02:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-04-26 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-26 00:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-04-25 23:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-25 22:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-04-25 21:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-25 20:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-04-25 19:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-04-25 18:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-04-25 17:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-04-25 16:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-04-25 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-04-25 14:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-04-25 13:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-04-25 12:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-04-25 11:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-04-25 10:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-04-25 09:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-04-25 08:00:00+00:00,FR04014,no2,37.6,µg/m³ +Paris,FR,2019-04-25 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-25 06:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-04-25 05:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-25 04:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-04-25 03:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-04-25 02:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-25 01:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-04-25 00:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-04-24 23:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-24 22:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-04-24 21:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-04-24 20:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-04-24 19:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-04-24 18:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-04-24 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-04-24 16:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-04-24 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-04-24 14:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-04-24 13:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-24 12:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-24 11:00:00+00:00,FR04014,no2,22.4,µg/m³ +Paris,FR,2019-04-24 10:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-24 09:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-24 08:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-04-24 07:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-04-24 06:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-04-24 05:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-24 04:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-04-24 03:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-04-24 02:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-04-24 01:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-24 00:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-23 23:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-04-23 22:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-23 21:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-23 20:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-23 19:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-23 18:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-04-23 17:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-04-23 16:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-23 15:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-04-23 14:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-04-23 13:00:00+00:00,FR04014,no2,53.2,µg/m³ +Paris,FR,2019-04-23 12:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-04-23 11:00:00+00:00,FR04014,no2,51.8,µg/m³ +Paris,FR,2019-04-23 10:00:00+00:00,FR04014,no2,47.9,µg/m³ +Paris,FR,2019-04-23 09:00:00+00:00,FR04014,no2,51.9,µg/m³ +Paris,FR,2019-04-23 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-04-23 07:00:00+00:00,FR04014,no2,86.0,µg/m³ +Paris,FR,2019-04-23 06:00:00+00:00,FR04014,no2,74.7,µg/m³ +Paris,FR,2019-04-23 05:00:00+00:00,FR04014,no2,49.2,µg/m³ +Paris,FR,2019-04-23 04:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-04-23 03:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-23 02:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-04-23 01:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-23 00:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-04-22 23:00:00+00:00,FR04014,no2,45.6,µg/m³ +Paris,FR,2019-04-22 22:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-04-22 21:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-04-22 20:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-04-22 19:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-22 18:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-04-22 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-04-22 16:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-04-22 15:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-22 14:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-04-22 13:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-22 12:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-22 11:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-22 10:00:00+00:00,FR04014,no2,43.5,µg/m³ +Paris,FR,2019-04-22 09:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-04-22 08:00:00+00:00,FR04014,no2,63.7,µg/m³ +Paris,FR,2019-04-22 07:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-04-22 06:00:00+00:00,FR04014,no2,65.7,µg/m³ +Paris,FR,2019-04-22 05:00:00+00:00,FR04014,no2,69.8,µg/m³ +Paris,FR,2019-04-22 04:00:00+00:00,FR04014,no2,80.2,µg/m³ +Paris,FR,2019-04-22 03:00:00+00:00,FR04014,no2,87.9,µg/m³ +Paris,FR,2019-04-22 02:00:00+00:00,FR04014,no2,88.7,µg/m³ +Paris,FR,2019-04-22 01:00:00+00:00,FR04014,no2,99.0,µg/m³ +Paris,FR,2019-04-22 00:00:00+00:00,FR04014,no2,116.4,µg/m³ +Paris,FR,2019-04-21 23:00:00+00:00,FR04014,no2,105.2,µg/m³ +Paris,FR,2019-04-21 22:00:00+00:00,FR04014,no2,117.2,µg/m³ +Paris,FR,2019-04-21 21:00:00+00:00,FR04014,no2,101.1,µg/m³ +Paris,FR,2019-04-21 20:00:00+00:00,FR04014,no2,75.6,µg/m³ +Paris,FR,2019-04-21 19:00:00+00:00,FR04014,no2,45.6,µg/m³ +Paris,FR,2019-04-21 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-21 17:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-04-21 16:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-21 15:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-04-21 14:00:00+00:00,FR04014,no2,9.3,µg/m³ +Paris,FR,2019-04-21 13:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-04-21 12:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-21 11:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-04-21 10:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-04-21 09:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-21 08:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-21 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-21 06:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-04-21 05:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-04-21 04:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-04-21 03:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-21 02:00:00+00:00,FR04014,no2,28.7,µg/m³ +Paris,FR,2019-04-21 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-04-21 00:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-04-20 23:00:00+00:00,FR04014,no2,49.2,µg/m³ +Paris,FR,2019-04-20 22:00:00+00:00,FR04014,no2,52.8,µg/m³ +Paris,FR,2019-04-20 21:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-20 20:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-04-20 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-04-20 18:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-20 17:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-04-20 16:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-20 15:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-04-20 14:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-04-20 13:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-04-20 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-20 11:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-04-20 10:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-04-20 09:00:00+00:00,FR04014,no2,44.0,µg/m³ +Paris,FR,2019-04-20 08:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-04-20 07:00:00+00:00,FR04014,no2,64.5,µg/m³ +Paris,FR,2019-04-20 06:00:00+00:00,FR04014,no2,67.1,µg/m³ +Paris,FR,2019-04-20 05:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-04-20 04:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-04-20 03:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-04-20 02:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-20 01:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-04-20 00:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-19 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-04-19 22:00:00+00:00,FR04014,no2,90.4,µg/m³ +Paris,FR,2019-04-19 21:00:00+00:00,FR04014,no2,96.9,µg/m³ +Paris,FR,2019-04-19 20:00:00+00:00,FR04014,no2,78.4,µg/m³ +Paris,FR,2019-04-19 19:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-04-19 18:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-04-19 17:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-19 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-19 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-19 14:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-19 13:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-04-19 12:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-04-19 11:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-19 10:00:00+00:00,FR04014,no2,51.3,µg/m³ +Paris,FR,2019-04-19 09:00:00+00:00,FR04014,no2,56.3,µg/m³ +Paris,FR,2019-04-19 08:00:00+00:00,FR04014,no2,61.4,µg/m³ +Paris,FR,2019-04-19 07:00:00+00:00,FR04014,no2,86.5,µg/m³ +Paris,FR,2019-04-19 06:00:00+00:00,FR04014,no2,89.3,µg/m³ +Paris,FR,2019-04-19 05:00:00+00:00,FR04014,no2,58.1,µg/m³ +Paris,FR,2019-04-19 04:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-19 03:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-04-19 02:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-19 01:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-04-19 00:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-04-18 23:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-18 22:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-18 21:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-04-18 20:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-18 19:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-04-18 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-18 17:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-04-18 16:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-04-18 15:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-04-18 14:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-18 13:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-04-18 12:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-18 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-04-18 10:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-18 09:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-04-18 08:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-04-18 07:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-18 06:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-04-18 05:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-04-18 04:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-18 03:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-04-18 02:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-04-18 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-04-18 00:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-17 23:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-04-17 22:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-04-17 21:00:00+00:00,FR04014,no2,37.3,µg/m³ +Paris,FR,2019-04-17 20:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-17 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-04-17 18:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-17 17:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-04-17 16:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-04-17 15:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-17 14:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-04-17 13:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-17 12:00:00+00:00,FR04014,no2,15.8,µg/m³ +Paris,FR,2019-04-17 11:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-17 10:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-04-17 09:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-04-17 08:00:00+00:00,FR04014,no2,72.7,µg/m³ +Paris,FR,2019-04-17 07:00:00+00:00,FR04014,no2,70.4,µg/m³ +Paris,FR,2019-04-17 06:00:00+00:00,FR04014,no2,72.9,µg/m³ +Paris,FR,2019-04-17 05:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-04-17 04:00:00+00:00,FR04014,no2,65.5,µg/m³ +Paris,FR,2019-04-17 03:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-04-17 02:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-17 01:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-04-17 00:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-04-16 23:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-16 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-04-16 21:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-16 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-04-16 19:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-16 18:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-04-16 17:00:00+00:00,FR04014,no2,44.0,µg/m³ +Paris,FR,2019-04-16 16:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-16 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-16 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-16 13:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-04-16 12:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-04-16 11:00:00+00:00,FR04014,no2,38.8,µg/m³ +Paris,FR,2019-04-16 10:00:00+00:00,FR04014,no2,47.1,µg/m³ +Paris,FR,2019-04-16 09:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-04-16 08:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-04-16 07:00:00+00:00,FR04014,no2,72.0,µg/m³ +Paris,FR,2019-04-16 06:00:00+00:00,FR04014,no2,79.0,µg/m³ +Paris,FR,2019-04-16 05:00:00+00:00,FR04014,no2,76.9,µg/m³ +Paris,FR,2019-04-16 04:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-04-16 03:00:00+00:00,FR04014,no2,34.6,µg/m³ +Paris,FR,2019-04-16 02:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-04-16 01:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-16 00:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-04-15 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-04-15 22:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-04-15 21:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-04-15 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-04-15 19:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-04-15 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-04-15 17:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-04-15 16:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-04-15 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-15 14:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-04-15 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-04-15 12:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-15 11:00:00+00:00,FR04014,no2,13.6,µg/m³ +Paris,FR,2019-04-15 10:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-15 09:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-04-15 08:00:00+00:00,FR04014,no2,53.9,µg/m³ +Paris,FR,2019-04-15 07:00:00+00:00,FR04014,no2,61.2,µg/m³ +Paris,FR,2019-04-15 06:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-04-15 05:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-15 04:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-04-15 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-04-15 02:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-15 01:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-04-15 00:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-04-14 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-04-14 22:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-14 21:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-14 20:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-04-14 19:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-04-14 18:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-14 17:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-04-14 16:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-04-14 15:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-04-14 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-04-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-14 12:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-04-14 11:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-04-14 10:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-14 09:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-04-14 08:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-04-14 07:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-04-14 06:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-04-14 05:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-04-14 04:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-04-14 03:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-04-14 02:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-14 01:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-04-14 00:00:00+00:00,FR04014,no2,41.1,µg/m³ +Paris,FR,2019-04-13 23:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-04-13 22:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-13 21:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-13 20:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-04-13 19:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-13 18:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-04-13 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-13 16:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-04-13 15:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-13 14:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-04-13 13:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-04-13 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-13 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-04-13 10:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-04-13 09:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-04-13 08:00:00+00:00,FR04014,no2,35.2,µg/m³ +Paris,FR,2019-04-13 07:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-04-13 06:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-13 05:00:00+00:00,FR04014,no2,38.7,µg/m³ +Paris,FR,2019-04-13 04:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-04-13 03:00:00+00:00,FR04014,no2,35.2,µg/m³ +Paris,FR,2019-04-13 02:00:00+00:00,FR04014,no2,38.9,µg/m³ +Paris,FR,2019-04-13 01:00:00+00:00,FR04014,no2,38.9,µg/m³ +Paris,FR,2019-04-13 00:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-04-12 23:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-04-12 22:00:00+00:00,FR04014,no2,42.4,µg/m³ +Paris,FR,2019-04-12 21:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-04-12 20:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-04-12 19:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-12 18:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-12 17:00:00+00:00,FR04014,no2,25.9,µg/m³ +Paris,FR,2019-04-12 16:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-04-12 15:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-12 14:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-12 13:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-12 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-12 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-12 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-04-12 09:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-04-12 08:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-12 07:00:00+00:00,FR04014,no2,48.3,µg/m³ +Paris,FR,2019-04-12 06:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-04-12 05:00:00+00:00,FR04014,no2,39.0,µg/m³ +Paris,FR,2019-04-12 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-04-12 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-04-12 02:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-04-12 01:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-04-12 00:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-04-11 23:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-04-11 22:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-04-11 21:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-04-11 20:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-04-11 19:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-04-11 18:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-04-11 17:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-04-11 16:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-11 15:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-04-11 14:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-04-11 13:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-04-11 12:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-11 11:00:00+00:00,FR04014,no2,25.4,µg/m³ +Paris,FR,2019-04-11 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-11 09:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-11 08:00:00+00:00,FR04014,no2,43.2,µg/m³ +Paris,FR,2019-04-11 07:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-11 06:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-04-11 05:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-11 04:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-04-11 03:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-11 02:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-04-11 01:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-04-11 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-10 23:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-04-10 22:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-10 21:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-10 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-10 19:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-10 18:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-10 17:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-04-10 16:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-04-10 15:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-04-10 14:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-10 13:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-10 12:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-04-10 11:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-10 10:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-04-10 09:00:00+00:00,FR04014,no2,41.1,µg/m³ +Paris,FR,2019-04-10 08:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-04-10 07:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-04-10 06:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-04-10 05:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-10 04:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-04-10 03:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-04-10 02:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-10 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-10 00:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-04-09 23:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-09 22:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-04-09 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-04-09 20:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-04-09 19:00:00+00:00,FR04014,no2,48.7,µg/m³ +Paris,FR,2019-04-09 18:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-04-09 17:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-04-09 16:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-09 15:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-09 14:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-04-09 13:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-09 12:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-04-09 11:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-09 10:00:00+00:00,FR04014,no2,67.1,µg/m³ +Paris,FR,2019-04-09 09:00:00+00:00,FR04014,no2,66.5,µg/m³ +Paris,FR,2019-04-09 08:00:00+00:00,FR04014,no2,69.5,µg/m³ +Paris,FR,2019-04-09 07:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-04-09 06:00:00+00:00,FR04014,no2,66.9,µg/m³ +Paris,FR,2019-04-09 05:00:00+00:00,FR04014,no2,59.5,µg/m³ +Paris,FR,2019-04-09 04:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-04-09 03:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-04-09 02:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-09 01:00:00+00:00,FR04014,no2,24.4,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,no2,41.0,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,no2,39.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,no2,36.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,no2,42.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,no2,10.0,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,no2,11.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,no2,53.0,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,no2,29.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,no2,74.5,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,no2,60.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,no2,38.0,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-20 00:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 23:00:00+00:00,BETR801,no2,16.5,µg/m³ +Antwerpen,BE,2019-05-19 22:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,no2,41.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,no2,26.5,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,µg/m³ +Antwerpen,BE,2019-05-06 02:00:00+00:00,BETR801,no2,27.0,µg/m³ +Antwerpen,BE,2019-05-06 01:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-05 02:00:00+00:00,BETR801,no2,13.0,µg/m³ +Antwerpen,BE,2019-05-05 01:00:00+00:00,BETR801,no2,18.0,µg/m³ +Antwerpen,BE,2019-05-04 02:00:00+00:00,BETR801,no2,9.5,µg/m³ +Antwerpen,BE,2019-05-04 01:00:00+00:00,BETR801,no2,8.5,µg/m³ +Antwerpen,BE,2019-05-03 02:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-03 01:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-02 02:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-05-02 01:00:00+00:00,BETR801,no2,31.0,µg/m³ +Antwerpen,BE,2019-05-01 02:00:00+00:00,BETR801,no2,12.0,µg/m³ +Antwerpen,BE,2019-05-01 01:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-04-30 02:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-04-30 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-29 02:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-04-29 01:00:00+00:00,BETR801,no2,72.5,µg/m³ +Antwerpen,BE,2019-04-28 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-04-28 01:00:00+00:00,BETR801,no2,8.5,µg/m³ +Antwerpen,BE,2019-04-27 02:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-04-27 01:00:00+00:00,BETR801,no2,22.0,µg/m³ +Antwerpen,BE,2019-04-26 02:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-26 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-04-25 02:00:00+00:00,BETR801,no2,12.0,µg/m³ +Antwerpen,BE,2019-04-25 01:00:00+00:00,BETR801,no2,13.0,µg/m³ +Antwerpen,BE,2019-04-22 01:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-04-21 02:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-21 01:00:00+00:00,BETR801,no2,18.0,µg/m³ +Antwerpen,BE,2019-04-19 01:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-18 02:00:00+00:00,BETR801,no2,35.0,µg/m³ +Antwerpen,BE,2019-04-17 03:00:00+00:00,BETR801,no2,38.5,µg/m³ +Antwerpen,BE,2019-04-17 02:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-04-17 01:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-04-16 02:00:00+00:00,BETR801,no2,21.5,µg/m³ +Antwerpen,BE,2019-04-16 01:00:00+00:00,BETR801,no2,27.5,µg/m³ +Antwerpen,BE,2019-04-15 15:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-04-15 14:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-04-15 13:00:00+00:00,BETR801,no2,31.0,µg/m³ +Antwerpen,BE,2019-04-15 12:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-04-15 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-15 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-15 09:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-04-15 08:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-04-15 07:00:00+00:00,BETR801,no2,54.0,µg/m³ +Antwerpen,BE,2019-04-15 06:00:00+00:00,BETR801,no2,64.0,µg/m³ +Antwerpen,BE,2019-04-15 05:00:00+00:00,BETR801,no2,63.0,µg/m³ +Antwerpen,BE,2019-04-15 04:00:00+00:00,BETR801,no2,49.0,µg/m³ +Antwerpen,BE,2019-04-15 03:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-04-15 02:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-04-15 01:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-04-12 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-04-12 01:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-11 02:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-04-11 01:00:00+00:00,BETR801,no2,13.5,µg/m³ +Antwerpen,BE,2019-04-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-04-10 01:00:00+00:00,BETR801,no2,13.5,µg/m³ +Antwerpen,BE,2019-04-09 13:00:00+00:00,BETR801,no2,27.5,µg/m³ +Antwerpen,BE,2019-04-09 12:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-04-09 11:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-04-09 10:00:00+00:00,BETR801,no2,33.5,µg/m³ +Antwerpen,BE,2019-04-09 09:00:00+00:00,BETR801,no2,35.0,µg/m³ +Antwerpen,BE,2019-04-09 08:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-04-09 07:00:00+00:00,BETR801,no2,38.5,µg/m³ +Antwerpen,BE,2019-04-09 06:00:00+00:00,BETR801,no2,50.0,µg/m³ +Antwerpen,BE,2019-04-09 05:00:00+00:00,BETR801,no2,46.5,µg/m³ +Antwerpen,BE,2019-04-09 04:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-04-09 03:00:00+00:00,BETR801,no2,54.5,µg/m³ +Antwerpen,BE,2019-04-09 02:00:00+00:00,BETR801,no2,53.5,µg/m³ +Antwerpen,BE,2019-04-09 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,no2,97.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-06 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-06 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-06 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-06 09:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-06 06:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 05:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 04:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-06 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-05 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-05 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-05 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-05 17:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 10:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 06:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 05:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 04:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-05 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-04 23:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 22:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-04 20:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-04 18:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-04 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-04 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 07:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 06:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-04 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-04 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-03 23:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 22:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-03 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-03 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-03 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-03 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-03 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-03 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-03 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-03 11:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-03 10:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-03 09:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-03 08:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-03 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-03 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 03:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 02:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-03 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 23:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-02 22:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-02 21:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-02 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-02 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-02 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-02 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-02 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 12:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-02 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-02 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-02 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-02 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-02 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-01 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 20:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-01 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-01 15:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 13:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 12:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 10:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-01 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-01 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-01 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-01 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-01 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-01 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-01 00:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-30 23:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 22:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 21:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-30 20:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-30 19:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-30 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-30 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-30 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-30 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-30 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-30 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-30 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-30 10:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-30 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-30 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-30 05:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 04:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 01:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-30 00:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-29 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-29 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-29 19:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 18:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 17:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-29 16:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-29 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-29 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-29 10:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-29 09:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-29 08:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-29 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-29 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-29 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-29 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-29 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-29 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-28 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-28 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-28 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-28 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-28 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 15:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 13:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-28 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-27 13:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-27 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-27 11:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-27 10:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-27 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 07:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-04-27 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-04-27 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 03:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-27 02:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-27 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-26 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-26 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-26 21:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-26 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-26 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-26 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-26 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-26 10:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 09:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-26 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-26 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-26 06:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-26 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-26 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-26 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-26 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-25 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-25 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-25 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-25 18:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 13:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-25 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-25 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-25 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-25 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-25 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-24 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 20:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-24 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-24 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-24 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-24 11:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-24 10:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-24 09:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-24 08:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-24 07:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-24 06:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-24 05:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-24 04:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-24 03:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-24 02:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-24 00:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 23:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 22:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 21:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 20:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-23 19:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-23 18:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 17:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-04-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 15:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 14:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-23 13:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-23 12:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-23 11:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-23 10:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-23 09:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-23 08:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-23 07:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-04-23 06:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 05:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 04:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 03:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-23 02:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-23 01:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-23 00:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-22 23:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-22 22:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-22 21:00:00+00:00,London Westminster,no2,73.0,µg/m³ +London,GB,2019-04-22 20:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-22 19:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-22 18:00:00+00:00,London Westminster,no2,64.0,µg/m³ +London,GB,2019-04-22 17:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-22 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-22 15:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-22 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 13:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 12:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-22 11:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-22 10:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-22 09:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-22 08:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-22 07:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-22 06:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-22 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 03:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 02:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 01:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-22 00:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 21:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-21 20:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-21 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-21 17:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-21 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 13:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 11:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 10:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 09:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-21 08:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 07:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 06:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-21 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 03:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-21 02:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-21 01:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-21 00:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-20 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 21:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-20 20:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-20 19:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 18:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-20 17:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-20 16:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-20 15:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-20 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-20 13:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-20 12:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-20 11:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-20 10:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 08:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-20 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-20 05:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-20 04:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-20 03:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-20 02:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-20 01:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-20 00:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-19 23:00:00+00:00,London Westminster,no2,77.0,µg/m³ +London,GB,2019-04-19 22:00:00+00:00,London Westminster,no2,77.0,µg/m³ +London,GB,2019-04-19 21:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-19 20:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-19 19:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-19 18:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-19 17:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-19 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-19 13:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-19 12:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-19 10:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-19 09:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-19 08:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-19 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-19 06:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-19 05:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-19 04:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-19 03:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-19 02:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-19 00:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-18 23:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-18 22:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-18 21:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-18 20:00:00+00:00,London Westminster,no2,69.0,µg/m³ +London,GB,2019-04-18 19:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-18 18:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-18 17:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-18 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-18 15:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 14:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 13:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-18 12:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 11:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-18 10:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-18 09:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-18 08:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 07:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 06:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 05:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-18 04:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-18 03:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 02:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 01:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 00:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 23:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-17 22:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-17 21:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-17 20:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-17 19:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-17 18:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-17 17:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-17 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-17 15:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-17 14:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-17 13:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 12:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-17 11:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-17 10:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-17 09:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 08:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-17 07:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-17 06:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-17 05:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 04:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 03:00:00+00:00,London Westminster,no2,72.0,µg/m³ +London,GB,2019-04-17 02:00:00+00:00,London Westminster,no2,72.0,µg/m³ +London,GB,2019-04-17 00:00:00+00:00,London Westminster,no2,71.0,µg/m³ +London,GB,2019-04-16 23:00:00+00:00,London Westminster,no2,81.0,µg/m³ +London,GB,2019-04-16 22:00:00+00:00,London Westminster,no2,81.0,µg/m³ +London,GB,2019-04-16 21:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-16 20:00:00+00:00,London Westminster,no2,83.0,µg/m³ +London,GB,2019-04-16 19:00:00+00:00,London Westminster,no2,76.0,µg/m³ +London,GB,2019-04-16 18:00:00+00:00,London Westminster,no2,70.0,µg/m³ +London,GB,2019-04-16 17:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-04-16 15:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-16 14:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-16 13:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-16 12:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-16 11:00:00+00:00,London Westminster,no2,79.0,µg/m³ +London,GB,2019-04-16 10:00:00+00:00,London Westminster,no2,70.0,µg/m³ +London,GB,2019-04-16 09:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-16 08:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-16 07:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-16 06:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-16 05:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-16 04:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-16 03:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-16 02:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-16 00:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 21:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-15 20:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-15 19:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 18:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-15 17:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-15 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-15 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-15 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-15 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-15 10:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 09:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-15 08:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 07:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 06:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 05:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-15 04:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-15 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 01:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-15 00:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-14 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-14 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-14 21:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-14 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 18:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 17:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-14 14:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-14 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-14 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-14 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-14 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 09:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-14 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-14 07:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-14 06:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-14 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 03:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 02:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-14 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-13 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-13 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-13 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-13 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-13 09:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-13 08:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-13 07:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-13 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-13 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 01:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 23:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 22:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-12 19:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-12 18:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-12 17:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-12 16:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-12 15:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-12 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-12 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-12 12:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-12 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-12 10:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-12 09:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-12 08:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-12 07:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-12 06:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-12 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-12 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-12 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-12 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-11 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-11 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-11 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-11 18:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-11 17:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-11 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-11 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-11 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 09:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 08:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-11 07:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-11 06:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-11 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 03:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 02:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 23:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 22:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 21:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-10 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-10 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-10 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-10 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-10 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 12:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 10:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-10 08:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-10 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-10 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-10 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-10 01:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-10 00:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-09 23:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-09 22:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-09 21:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-09 20:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-09 19:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-09 17:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-09 16:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-09 15:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-09 14:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-09 13:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-09 12:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-09 11:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-09 10:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-09 09:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-09 08:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-09 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-09 06:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 03:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-09 02:00:00+00:00,London Westminster,no2,67.0,µg/m³ diff --git a/doc/data/air_quality_no2.csv b/doc/data/air_quality_no2.csv new file mode 100644 index 0000000000000..7fa879f7c7e78 --- /dev/null +++ b/doc/data/air_quality_no2.csv @@ -0,0 +1,1036 @@ +datetime,station_antwerp,station_paris,station_london +2019-05-07 02:00:00,,,23.0 +2019-05-07 03:00:00,50.5,25.0,19.0 +2019-05-07 04:00:00,45.0,27.7,19.0 +2019-05-07 05:00:00,,50.4,16.0 +2019-05-07 06:00:00,,61.9, +2019-05-07 07:00:00,,72.4,26.0 +2019-05-07 08:00:00,,77.7,32.0 +2019-05-07 09:00:00,,67.9,32.0 +2019-05-07 10:00:00,,56.0,28.0 +2019-05-07 11:00:00,,34.5,21.0 +2019-05-07 12:00:00,,20.1,21.0 +2019-05-07 13:00:00,,13.0,18.0 +2019-05-07 14:00:00,,10.6,20.0 +2019-05-07 15:00:00,,13.2,18.0 +2019-05-07 16:00:00,,11.0,20.0 +2019-05-07 17:00:00,,11.7,20.0 +2019-05-07 18:00:00,,18.2,21.0 +2019-05-07 19:00:00,,22.3,20.0 +2019-05-07 20:00:00,,21.4,20.0 +2019-05-07 21:00:00,,26.8,24.0 +2019-05-07 22:00:00,,36.2,24.0 +2019-05-07 23:00:00,,33.9, +2019-05-08 00:00:00,,35.8,24.0 +2019-05-08 01:00:00,,34.0,19.0 +2019-05-08 02:00:00,,22.1,19.0 +2019-05-08 03:00:00,23.0,19.6,20.0 +2019-05-08 04:00:00,20.5,15.3,20.0 +2019-05-08 05:00:00,,13.5,19.0 +2019-05-08 06:00:00,,15.5,19.0 +2019-05-08 07:00:00,,19.3,29.0 +2019-05-08 08:00:00,,21.7,34.0 +2019-05-08 09:00:00,,19.5,36.0 +2019-05-08 10:00:00,,17.0,33.0 +2019-05-08 11:00:00,,19.7,28.0 +2019-05-08 12:00:00,,33.4,27.0 +2019-05-08 13:00:00,,21.4,26.0 +2019-05-08 14:00:00,,15.1,26.0 +2019-05-08 15:00:00,,14.3,24.0 +2019-05-08 16:00:00,,25.3,27.0 +2019-05-08 17:00:00,,26.0,28.0 +2019-05-08 18:00:00,,38.6,31.0 +2019-05-08 19:00:00,,29.3,40.0 +2019-05-08 20:00:00,,27.8,25.0 +2019-05-08 21:00:00,,41.3,29.0 +2019-05-08 22:00:00,,38.3,26.0 +2019-05-08 23:00:00,,48.9, +2019-05-09 00:00:00,,32.2,25.0 +2019-05-09 01:00:00,,25.2,30.0 +2019-05-09 02:00:00,,14.7, +2019-05-09 03:00:00,20.0,10.6,31.0 +2019-05-09 04:00:00,20.5,10.0,31.0 +2019-05-09 05:00:00,,10.4,33.0 +2019-05-09 06:00:00,,15.3,33.0 +2019-05-09 07:00:00,,34.5,33.0 +2019-05-09 08:00:00,,50.7,33.0 +2019-05-09 09:00:00,,49.0,35.0 +2019-05-09 10:00:00,,32.2,36.0 +2019-05-09 11:00:00,,32.3,28.0 +2019-05-09 12:00:00,,43.1,27.0 +2019-05-09 13:00:00,,34.2,30.0 +2019-05-09 14:00:00,,35.1,27.0 +2019-05-09 15:00:00,,21.3,34.0 +2019-05-09 16:00:00,,24.6,97.0 +2019-05-09 17:00:00,,23.9,67.0 +2019-05-09 18:00:00,,27.0,60.0 +2019-05-09 19:00:00,,29.9,58.0 +2019-05-09 20:00:00,,24.4,62.0 +2019-05-09 21:00:00,,23.8,59.0 +2019-05-09 22:00:00,,29.2,65.0 +2019-05-09 23:00:00,,34.5,59.0 +2019-05-10 00:00:00,,29.7,59.0 +2019-05-10 01:00:00,,26.7,52.0 +2019-05-10 02:00:00,,22.7,52.0 +2019-05-10 03:00:00,10.5,19.1,41.0 +2019-05-10 04:00:00,11.5,14.1,41.0 +2019-05-10 05:00:00,,15.0,40.0 +2019-05-10 06:00:00,,20.5,40.0 +2019-05-10 07:00:00,,37.8,39.0 +2019-05-10 08:00:00,,47.4,36.0 +2019-05-10 09:00:00,,57.3,39.0 +2019-05-10 10:00:00,,60.7,34.0 +2019-05-10 11:00:00,,53.4,31.0 +2019-05-10 12:00:00,,35.1,29.0 +2019-05-10 13:00:00,,23.2,28.0 +2019-05-10 14:00:00,,25.3,26.0 +2019-05-10 15:00:00,,22.0,25.0 +2019-05-10 16:00:00,,29.3,25.0 +2019-05-10 17:00:00,,29.6,24.0 +2019-05-10 18:00:00,,30.8,26.0 +2019-05-10 19:00:00,,37.8,26.0 +2019-05-10 20:00:00,,33.4,29.0 +2019-05-10 21:00:00,,39.3,29.0 +2019-05-10 22:00:00,,43.6,29.0 +2019-05-10 23:00:00,,37.0,31.0 +2019-05-11 00:00:00,,28.1,31.0 +2019-05-11 01:00:00,,26.0,27.0 +2019-05-11 02:00:00,,24.8,27.0 +2019-05-11 03:00:00,26.5,15.5,32.0 +2019-05-11 04:00:00,21.0,14.9,32.0 +2019-05-11 05:00:00,,,35.0 +2019-05-11 06:00:00,,,35.0 +2019-05-11 07:00:00,,,30.0 +2019-05-11 08:00:00,,28.9,30.0 +2019-05-11 09:00:00,,29.0,27.0 +2019-05-11 10:00:00,,32.1,30.0 +2019-05-11 11:00:00,,35.7, +2019-05-11 12:00:00,,36.8, +2019-05-11 13:00:00,,33.2, +2019-05-11 14:00:00,,30.2, +2019-05-11 15:00:00,,30.8, +2019-05-11 16:00:00,,17.8,28.0 +2019-05-11 17:00:00,,18.0,26.0 +2019-05-11 18:00:00,,19.5,28.0 +2019-05-11 19:00:00,,32.0,31.0 +2019-05-11 20:00:00,,33.1,33.0 +2019-05-11 21:00:00,,31.2,33.0 +2019-05-11 22:00:00,,24.2,34.0 +2019-05-11 23:00:00,,21.1,37.0 +2019-05-12 00:00:00,,27.7,37.0 +2019-05-12 01:00:00,,26.4,35.0 +2019-05-12 02:00:00,,22.8,35.0 +2019-05-12 03:00:00,17.5,19.2,38.0 +2019-05-12 04:00:00,20.0,17.2,38.0 +2019-05-12 05:00:00,,16.0,36.0 +2019-05-12 06:00:00,,16.2,36.0 +2019-05-12 07:00:00,,19.2,38.0 +2019-05-12 08:00:00,,20.1,44.0 +2019-05-12 09:00:00,,15.9,32.0 +2019-05-12 10:00:00,,14.6,26.0 +2019-05-12 11:00:00,,11.7,26.0 +2019-05-12 12:00:00,,11.4,21.0 +2019-05-12 13:00:00,,11.4,20.0 +2019-05-12 14:00:00,,10.9,19.0 +2019-05-12 15:00:00,,8.7,21.0 +2019-05-12 16:00:00,,9.1,22.0 +2019-05-12 17:00:00,,9.6,23.0 +2019-05-12 18:00:00,,11.7,24.0 +2019-05-12 19:00:00,,13.9,22.0 +2019-05-12 20:00:00,,18.2,22.0 +2019-05-12 21:00:00,,19.5,22.0 +2019-05-12 22:00:00,,24.1,21.0 +2019-05-12 23:00:00,,34.2,22.0 +2019-05-13 00:00:00,,46.5,22.0 +2019-05-13 01:00:00,,32.5,22.0 +2019-05-13 02:00:00,,25.0,22.0 +2019-05-13 03:00:00,14.5,18.9,24.0 +2019-05-13 04:00:00,14.5,18.5,24.0 +2019-05-13 05:00:00,,18.9,33.0 +2019-05-13 06:00:00,,25.1,33.0 +2019-05-13 07:00:00,,38.3,39.0 +2019-05-13 08:00:00,,45.2,39.0 +2019-05-13 09:00:00,,41.0,31.0 +2019-05-13 10:00:00,,32.1,29.0 +2019-05-13 11:00:00,,20.6,27.0 +2019-05-13 12:00:00,,12.8,26.0 +2019-05-13 13:00:00,,9.6,24.0 +2019-05-13 14:00:00,,9.2,25.0 +2019-05-13 15:00:00,,10.1,26.0 +2019-05-13 16:00:00,,10.7,28.0 +2019-05-13 17:00:00,,10.6,29.0 +2019-05-13 18:00:00,,12.1,30.0 +2019-05-13 19:00:00,,13.0,30.0 +2019-05-13 20:00:00,,15.5,31.0 +2019-05-13 21:00:00,,23.9,31.0 +2019-05-13 22:00:00,,28.3,31.0 +2019-05-13 23:00:00,,30.4,31.0 +2019-05-14 00:00:00,,27.3,31.0 +2019-05-14 01:00:00,,22.8,23.0 +2019-05-14 02:00:00,,20.9,23.0 +2019-05-14 03:00:00,14.5,19.1,26.0 +2019-05-14 04:00:00,11.5,19.0,26.0 +2019-05-14 05:00:00,,22.1,30.0 +2019-05-14 06:00:00,,31.6,30.0 +2019-05-14 07:00:00,,38.6,33.0 +2019-05-14 08:00:00,,46.1,34.0 +2019-05-14 09:00:00,,41.3,33.0 +2019-05-14 10:00:00,,28.8,30.0 +2019-05-14 11:00:00,,19.0,31.0 +2019-05-14 12:00:00,,12.9,27.0 +2019-05-14 13:00:00,,11.3,25.0 +2019-05-14 14:00:00,,10.2,25.0 +2019-05-14 15:00:00,,11.0,25.0 +2019-05-14 16:00:00,,15.2,29.0 +2019-05-14 17:00:00,,13.4,32.0 +2019-05-14 18:00:00,,15.3,33.0 +2019-05-14 19:00:00,,17.7,30.0 +2019-05-14 20:00:00,,17.9,28.0 +2019-05-14 21:00:00,,23.3,27.0 +2019-05-14 22:00:00,,28.4,25.0 +2019-05-14 23:00:00,,29.0,26.0 +2019-05-15 00:00:00,,30.9,26.0 +2019-05-15 01:00:00,,24.3,22.0 +2019-05-15 02:00:00,,18.8, +2019-05-15 03:00:00,25.5,17.2,22.0 +2019-05-15 04:00:00,22.5,16.8,22.0 +2019-05-15 05:00:00,,17.9,25.0 +2019-05-15 06:00:00,,28.9,25.0 +2019-05-15 07:00:00,,46.5,33.0 +2019-05-15 08:00:00,,48.1,33.0 +2019-05-15 09:00:00,,32.1,34.0 +2019-05-15 10:00:00,,25.7,35.0 +2019-05-15 11:00:00,,0.0,36.0 +2019-05-15 12:00:00,,0.0,35.0 +2019-05-15 13:00:00,,0.0,30.0 +2019-05-15 14:00:00,,9.4,31.0 +2019-05-15 15:00:00,,10.0,30.0 +2019-05-15 16:00:00,,11.9,38.0 +2019-05-15 17:00:00,,12.9,38.0 +2019-05-15 18:00:00,,12.2,33.0 +2019-05-15 19:00:00,,12.9,35.0 +2019-05-15 20:00:00,,16.5,33.0 +2019-05-15 21:00:00,,20.3,31.0 +2019-05-15 22:00:00,,30.1,32.0 +2019-05-15 23:00:00,,36.0,33.0 +2019-05-16 00:00:00,,44.1,33.0 +2019-05-16 01:00:00,,30.9,33.0 +2019-05-16 02:00:00,,27.4,33.0 +2019-05-16 03:00:00,28.0,26.0,28.0 +2019-05-16 04:00:00,,26.7,28.0 +2019-05-16 05:00:00,,27.9,26.0 +2019-05-16 06:00:00,,37.0,26.0 +2019-05-16 07:00:00,,52.6,33.0 +2019-05-16 08:00:00,,,34.0 +2019-05-16 09:00:00,,40.0,33.0 +2019-05-16 10:00:00,,39.4,32.0 +2019-05-16 11:00:00,,29.5,31.0 +2019-05-16 12:00:00,,13.5,33.0 +2019-05-16 13:00:00,,10.5,30.0 +2019-05-16 14:00:00,,9.2,27.0 +2019-05-16 15:00:00,,8.5,27.0 +2019-05-16 16:00:00,,8.1,26.0 +2019-05-16 17:00:00,,10.1,29.0 +2019-05-16 18:00:00,,10.3,30.0 +2019-05-16 19:00:00,,13.5,25.0 +2019-05-16 20:00:00,,15.9,27.0 +2019-05-16 21:00:00,,14.4,26.0 +2019-05-16 22:00:00,,24.8,25.0 +2019-05-16 23:00:00,,24.3,25.0 +2019-05-17 00:00:00,,37.1,25.0 +2019-05-17 01:00:00,,43.7,23.0 +2019-05-17 02:00:00,,46.3,23.0 +2019-05-17 03:00:00,,26.1,21.0 +2019-05-17 04:00:00,,24.6,21.0 +2019-05-17 05:00:00,,26.6,21.0 +2019-05-17 06:00:00,,28.4,21.0 +2019-05-17 07:00:00,,34.0,25.0 +2019-05-17 08:00:00,,46.3,27.0 +2019-05-17 09:00:00,,55.0,27.0 +2019-05-17 10:00:00,,57.5,29.0 +2019-05-17 11:00:00,,60.5,30.0 +2019-05-17 12:00:00,,51.5,30.0 +2019-05-17 13:00:00,,43.1,30.0 +2019-05-17 14:00:00,,46.5,29.0 +2019-05-17 15:00:00,,37.9,31.0 +2019-05-17 16:00:00,,27.0,32.0 +2019-05-17 17:00:00,,22.2,30.0 +2019-05-17 18:00:00,,20.7,29.0 +2019-05-17 19:00:00,,27.9,31.0 +2019-05-17 20:00:00,,33.6,36.0 +2019-05-17 21:00:00,,24.7,36.0 +2019-05-17 22:00:00,,23.5,36.0 +2019-05-17 23:00:00,,24.3,35.0 +2019-05-18 00:00:00,,28.2,35.0 +2019-05-18 01:00:00,,34.1,31.0 +2019-05-18 02:00:00,,31.5,31.0 +2019-05-18 03:00:00,41.5,37.4,31.0 +2019-05-18 04:00:00,,29.0,31.0 +2019-05-18 05:00:00,,16.1,29.0 +2019-05-18 06:00:00,,16.6,29.0 +2019-05-18 07:00:00,,20.1,27.0 +2019-05-18 08:00:00,,22.1,29.0 +2019-05-18 09:00:00,,27.4,35.0 +2019-05-18 10:00:00,,20.4,32.0 +2019-05-18 11:00:00,,21.1,35.0 +2019-05-18 12:00:00,,24.1,34.0 +2019-05-18 13:00:00,,17.5,38.0 +2019-05-18 14:00:00,,12.9,29.0 +2019-05-18 15:00:00,,10.5,27.0 +2019-05-18 16:00:00,,11.8,28.0 +2019-05-18 17:00:00,,13.0,30.0 +2019-05-18 18:00:00,,14.6,42.0 +2019-05-18 19:00:00,,12.8,42.0 +2019-05-18 20:00:00,35.5,14.5,36.0 +2019-05-18 21:00:00,35.5,67.5,35.0 +2019-05-18 22:00:00,40.0,36.2,41.0 +2019-05-18 23:00:00,39.0,59.3,46.0 +2019-05-19 00:00:00,34.5,62.5,46.0 +2019-05-19 01:00:00,29.5,50.2,49.0 +2019-05-19 02:00:00,23.5,49.6,49.0 +2019-05-19 03:00:00,22.5,34.9,49.0 +2019-05-19 04:00:00,19.0,38.1,49.0 +2019-05-19 05:00:00,19.0,36.4,49.0 +2019-05-19 06:00:00,21.0,39.4,49.0 +2019-05-19 07:00:00,26.0,40.9,38.0 +2019-05-19 08:00:00,30.5,31.1,36.0 +2019-05-19 09:00:00,30.0,32.4,33.0 +2019-05-19 10:00:00,23.5,31.7,30.0 +2019-05-19 11:00:00,16.0,33.0,27.0 +2019-05-19 12:00:00,17.5,31.0,28.0 +2019-05-19 13:00:00,17.0,32.6,25.0 +2019-05-19 14:00:00,16.0,27.9,27.0 +2019-05-19 15:00:00,14.5,21.0,31.0 +2019-05-19 16:00:00,23.0,23.8,29.0 +2019-05-19 17:00:00,33.0,31.7,28.0 +2019-05-19 18:00:00,17.5,32.5,27.0 +2019-05-19 19:00:00,18.5,33.9,29.0 +2019-05-19 20:00:00,15.5,32.7,30.0 +2019-05-19 21:00:00,26.0,51.2,32.0 +2019-05-19 22:00:00,15.0,35.6,32.0 +2019-05-19 23:00:00,12.5,23.2,32.0 +2019-05-20 00:00:00,18.5,22.2,32.0 +2019-05-20 01:00:00,16.5,18.8,28.0 +2019-05-20 02:00:00,26.0,16.4,28.0 +2019-05-20 03:00:00,17.0,12.8,32.0 +2019-05-20 04:00:00,10.5,12.1,32.0 +2019-05-20 05:00:00,9.0,12.6,26.0 +2019-05-20 06:00:00,14.0,14.9,26.0 +2019-05-20 07:00:00,20.0,25.2,31.0 +2019-05-20 08:00:00,26.0,40.1,31.0 +2019-05-20 09:00:00,38.0,46.9,29.0 +2019-05-20 10:00:00,40.0,46.1,29.0 +2019-05-20 11:00:00,30.5,45.5,28.0 +2019-05-20 12:00:00,25.0,43.9,28.0 +2019-05-20 13:00:00,25.0,35.4,28.0 +2019-05-20 14:00:00,34.5,23.8,29.0 +2019-05-20 15:00:00,32.0,23.7,32.0 +2019-05-20 16:00:00,24.5,27.5,32.0 +2019-05-20 17:00:00,25.5,26.5,29.0 +2019-05-20 18:00:00,,32.4,30.0 +2019-05-20 19:00:00,,24.6,33.0 +2019-05-20 20:00:00,,32.2,32.0 +2019-05-20 21:00:00,,21.3,32.0 +2019-05-20 22:00:00,,21.6,34.0 +2019-05-20 23:00:00,,20.3,47.0 +2019-05-21 00:00:00,,20.7,47.0 +2019-05-21 01:00:00,,19.6,35.0 +2019-05-21 02:00:00,,16.9,35.0 +2019-05-21 03:00:00,15.5,16.3,26.0 +2019-05-21 04:00:00,,17.7,26.0 +2019-05-21 05:00:00,,17.9,23.0 +2019-05-21 06:00:00,,18.5,23.0 +2019-05-21 07:00:00,,38.0,30.0 +2019-05-21 08:00:00,,62.6,27.0 +2019-05-21 09:00:00,,56.0,28.0 +2019-05-21 10:00:00,,54.2,29.0 +2019-05-21 11:00:00,,48.1,29.0 +2019-05-21 12:00:00,,30.4,26.0 +2019-05-21 13:00:00,,25.5,26.0 +2019-05-21 14:00:00,,30.5,28.0 +2019-05-21 15:00:00,,49.7,33.0 +2019-05-21 16:00:00,,47.8,34.0 +2019-05-21 17:00:00,,36.6,34.0 +2019-05-21 18:00:00,,42.3,37.0 +2019-05-21 19:00:00,,75.0,35.0 +2019-05-21 20:00:00,,54.3,40.0 +2019-05-21 21:00:00,,50.0,38.0 +2019-05-21 22:00:00,,40.8,33.0 +2019-05-21 23:00:00,,43.0,33.0 +2019-05-22 00:00:00,,33.2,33.0 +2019-05-22 01:00:00,,29.5,30.0 +2019-05-22 02:00:00,,27.1,30.0 +2019-05-22 03:00:00,20.5,27.9,27.0 +2019-05-22 04:00:00,,19.2,27.0 +2019-05-22 05:00:00,,25.2,21.0 +2019-05-22 06:00:00,,33.7,21.0 +2019-05-22 07:00:00,,45.1,28.0 +2019-05-22 08:00:00,,75.7,29.0 +2019-05-22 09:00:00,,75.4,31.0 +2019-05-22 10:00:00,,70.8,31.0 +2019-05-22 11:00:00,,63.1,31.0 +2019-05-22 12:00:00,,57.8,28.0 +2019-05-22 13:00:00,,42.6,25.0 +2019-05-22 14:00:00,,42.2,25.0 +2019-05-22 15:00:00,,38.5,28.0 +2019-05-22 16:00:00,,40.0,30.0 +2019-05-22 17:00:00,,33.2,32.0 +2019-05-22 18:00:00,,34.9,34.0 +2019-05-22 19:00:00,,36.1,34.0 +2019-05-22 20:00:00,,34.1,33.0 +2019-05-22 21:00:00,,36.2,33.0 +2019-05-22 22:00:00,,44.9,31.0 +2019-05-22 23:00:00,,37.7,32.0 +2019-05-23 00:00:00,,29.8,32.0 +2019-05-23 01:00:00,,62.1,23.0 +2019-05-23 02:00:00,,53.3,23.0 +2019-05-23 03:00:00,60.5,53.1,20.0 +2019-05-23 04:00:00,,66.6,20.0 +2019-05-23 05:00:00,,76.8,19.0 +2019-05-23 06:00:00,,71.9,19.0 +2019-05-23 07:00:00,,68.7,24.0 +2019-05-23 08:00:00,,79.6,26.0 +2019-05-23 09:00:00,,91.8,25.0 +2019-05-23 10:00:00,,97.0,23.0 +2019-05-23 11:00:00,,79.4,25.0 +2019-05-23 12:00:00,,28.3,24.0 +2019-05-23 13:00:00,,17.0,25.0 +2019-05-23 14:00:00,,16.4,28.0 +2019-05-23 15:00:00,,21.2,34.0 +2019-05-23 16:00:00,,17.2,38.0 +2019-05-23 17:00:00,,17.5,53.0 +2019-05-23 18:00:00,,17.8,60.0 +2019-05-23 19:00:00,,22.7,54.0 +2019-05-23 20:00:00,,23.5,51.0 +2019-05-23 21:00:00,,28.0,45.0 +2019-05-23 22:00:00,,33.8,44.0 +2019-05-23 23:00:00,,47.0,39.0 +2019-05-24 00:00:00,,61.9,39.0 +2019-05-24 01:00:00,,23.2,31.0 +2019-05-24 02:00:00,,32.8, +2019-05-24 03:00:00,74.5,28.8,31.0 +2019-05-24 04:00:00,,28.4,31.0 +2019-05-24 05:00:00,,19.4,23.0 +2019-05-24 06:00:00,,28.1,23.0 +2019-05-24 07:00:00,,35.9,29.0 +2019-05-24 08:00:00,,40.7,28.0 +2019-05-24 09:00:00,,54.8,26.0 +2019-05-24 10:00:00,,45.9,24.0 +2019-05-24 11:00:00,,37.9,23.0 +2019-05-24 12:00:00,,28.6,26.0 +2019-05-24 13:00:00,,40.6,29.0 +2019-05-24 14:00:00,,29.3,33.0 +2019-05-24 15:00:00,,24.3,39.0 +2019-05-24 16:00:00,,20.5,40.0 +2019-05-24 17:00:00,,22.7,43.0 +2019-05-24 18:00:00,,27.3,46.0 +2019-05-24 19:00:00,,25.2,46.0 +2019-05-24 20:00:00,,23.3,44.0 +2019-05-24 21:00:00,,21.9,42.0 +2019-05-24 22:00:00,,31.7,38.0 +2019-05-24 23:00:00,,18.1,39.0 +2019-05-25 00:00:00,,18.0,39.0 +2019-05-25 01:00:00,,16.5,32.0 +2019-05-25 02:00:00,,17.4,32.0 +2019-05-25 03:00:00,29.0,12.8,25.0 +2019-05-25 04:00:00,,20.3,25.0 +2019-05-25 05:00:00,,,21.0 +2019-05-25 06:00:00,,,21.0 +2019-05-25 07:00:00,,,22.0 +2019-05-25 08:00:00,,36.9,22.0 +2019-05-25 09:00:00,,42.1,23.0 +2019-05-25 10:00:00,,44.5,23.0 +2019-05-25 11:00:00,,33.6,21.0 +2019-05-25 12:00:00,,26.3,23.0 +2019-05-25 13:00:00,,19.5,24.0 +2019-05-25 14:00:00,,18.6,26.0 +2019-05-25 15:00:00,,26.1,31.0 +2019-05-25 16:00:00,,23.6,37.0 +2019-05-25 17:00:00,,30.0,42.0 +2019-05-25 18:00:00,,31.9,46.0 +2019-05-25 19:00:00,,20.6,47.0 +2019-05-25 20:00:00,,30.4,47.0 +2019-05-25 21:00:00,,22.1,44.0 +2019-05-25 22:00:00,,43.6,41.0 +2019-05-25 23:00:00,,39.5,36.0 +2019-05-26 00:00:00,,63.9,36.0 +2019-05-26 01:00:00,,70.2,32.0 +2019-05-26 02:00:00,,67.0,32.0 +2019-05-26 03:00:00,53.0,49.8,26.0 +2019-05-26 04:00:00,,23.4,26.0 +2019-05-26 05:00:00,,22.9,20.0 +2019-05-26 06:00:00,,22.3,20.0 +2019-05-26 07:00:00,,16.8,17.0 +2019-05-26 08:00:00,,15.1,17.0 +2019-05-26 09:00:00,,13.4,15.0 +2019-05-26 10:00:00,,11.0,15.0 +2019-05-26 11:00:00,,10.3,16.0 +2019-05-26 12:00:00,,11.3,17.0 +2019-05-26 13:00:00,,13.3,21.0 +2019-05-26 14:00:00,,11.5,24.0 +2019-05-26 15:00:00,,12.5,25.0 +2019-05-26 16:00:00,,15.3,26.0 +2019-05-26 17:00:00,,11.7,27.0 +2019-05-26 18:00:00,,17.1,26.0 +2019-05-26 19:00:00,,17.3,28.0 +2019-05-26 20:00:00,,22.8,26.0 +2019-05-26 21:00:00,,17.8,25.0 +2019-05-26 22:00:00,,16.6,27.0 +2019-05-26 23:00:00,,16.1,26.0 +2019-05-27 00:00:00,,15.2,26.0 +2019-05-27 01:00:00,,10.3,26.0 +2019-05-27 02:00:00,,9.5,26.0 +2019-05-27 03:00:00,10.5,7.1,24.0 +2019-05-27 04:00:00,,5.9,24.0 +2019-05-27 05:00:00,,4.8,19.0 +2019-05-27 06:00:00,,6.5,19.0 +2019-05-27 07:00:00,,20.3,18.0 +2019-05-27 08:00:00,,29.1,18.0 +2019-05-27 09:00:00,,29.5,18.0 +2019-05-27 10:00:00,,34.2,18.0 +2019-05-27 11:00:00,,31.4,16.0 +2019-05-27 12:00:00,,23.3,17.0 +2019-05-27 13:00:00,,19.3,17.0 +2019-05-27 14:00:00,,17.3,20.0 +2019-05-27 15:00:00,,17.5,20.0 +2019-05-27 16:00:00,,17.3,22.0 +2019-05-27 17:00:00,,25.6,22.0 +2019-05-27 18:00:00,,23.6,22.0 +2019-05-27 19:00:00,,22.9,22.0 +2019-05-27 20:00:00,,25.6,22.0 +2019-05-27 21:00:00,,22.1,23.0 +2019-05-27 22:00:00,,22.3,20.0 +2019-05-27 23:00:00,,18.8,19.0 +2019-05-28 00:00:00,,19.9,19.0 +2019-05-28 01:00:00,,22.6,16.0 +2019-05-28 02:00:00,,15.4,16.0 +2019-05-28 03:00:00,11.0,8.2,16.0 +2019-05-28 04:00:00,,6.4,16.0 +2019-05-28 05:00:00,,6.1,15.0 +2019-05-28 06:00:00,,8.9,15.0 +2019-05-28 07:00:00,,19.9,19.0 +2019-05-28 08:00:00,,28.8,20.0 +2019-05-28 09:00:00,,33.8,20.0 +2019-05-28 10:00:00,,31.2,20.0 +2019-05-28 11:00:00,,24.3,21.0 +2019-05-28 12:00:00,,21.6,21.0 +2019-05-28 13:00:00,,20.5,28.0 +2019-05-28 14:00:00,,24.8,27.0 +2019-05-28 15:00:00,,18.5,29.0 +2019-05-28 16:00:00,,18.8,30.0 +2019-05-28 17:00:00,,25.0,27.0 +2019-05-28 18:00:00,,26.5,25.0 +2019-05-28 19:00:00,,20.8,29.0 +2019-05-28 20:00:00,,16.2,29.0 +2019-05-28 21:00:00,,18.5,29.0 +2019-05-28 22:00:00,,20.4,31.0 +2019-05-28 23:00:00,,20.4, +2019-05-29 00:00:00,,20.2,25.0 +2019-05-29 01:00:00,,25.3,26.0 +2019-05-29 02:00:00,,23.4,26.0 +2019-05-29 03:00:00,21.0,21.6,23.0 +2019-05-29 04:00:00,,19.0,23.0 +2019-05-29 05:00:00,,20.3,21.0 +2019-05-29 06:00:00,,24.1,21.0 +2019-05-29 07:00:00,,36.7,24.0 +2019-05-29 08:00:00,,46.5,22.0 +2019-05-29 09:00:00,,50.5,21.0 +2019-05-29 10:00:00,,45.7,18.0 +2019-05-29 11:00:00,,34.5,18.0 +2019-05-29 12:00:00,,30.7,18.0 +2019-05-29 13:00:00,,22.0,20.0 +2019-05-29 14:00:00,,13.2,13.0 +2019-05-29 15:00:00,,17.8,15.0 +2019-05-29 16:00:00,,0.0,5.0 +2019-05-29 17:00:00,,0.0,3.0 +2019-05-29 18:00:00,,20.1,5.0 +2019-05-29 19:00:00,,22.9,5.0 +2019-05-29 20:00:00,,25.3,5.0 +2019-05-29 21:00:00,,24.1,6.0 +2019-05-29 22:00:00,,20.8,6.0 +2019-05-29 23:00:00,,16.9,5.0 +2019-05-30 00:00:00,,19.0,5.0 +2019-05-30 01:00:00,,19.9,1.0 +2019-05-30 02:00:00,,19.4,1.0 +2019-05-30 03:00:00,7.5,12.4,0.0 +2019-05-30 04:00:00,,9.4,0.0 +2019-05-30 05:00:00,,10.6,0.0 +2019-05-30 06:00:00,,10.4,0.0 +2019-05-30 07:00:00,,12.2,0.0 +2019-05-30 08:00:00,,13.3,2.0 +2019-05-30 09:00:00,,18.3,3.0 +2019-05-30 10:00:00,,16.7,5.0 +2019-05-30 11:00:00,,15.1,9.0 +2019-05-30 12:00:00,,13.8,13.0 +2019-05-30 13:00:00,,14.9,17.0 +2019-05-30 14:00:00,,14.2,20.0 +2019-05-30 15:00:00,,16.1,22.0 +2019-05-30 16:00:00,,14.9,22.0 +2019-05-30 17:00:00,,13.0,27.0 +2019-05-30 18:00:00,,12.8,30.0 +2019-05-30 19:00:00,,20.4,28.0 +2019-05-30 20:00:00,,22.1,28.0 +2019-05-30 21:00:00,,22.9,27.0 +2019-05-30 22:00:00,,21.9,27.0 +2019-05-30 23:00:00,,26.9,23.0 +2019-05-31 00:00:00,,27.0,23.0 +2019-05-31 01:00:00,,29.6,18.0 +2019-05-31 02:00:00,,27.2,18.0 +2019-05-31 03:00:00,9.0,36.9,12.0 +2019-05-31 04:00:00,,44.1,12.0 +2019-05-31 05:00:00,,40.1,9.0 +2019-05-31 06:00:00,,31.1,9.0 +2019-05-31 07:00:00,,37.2,8.0 +2019-05-31 08:00:00,,38.6,9.0 +2019-05-31 09:00:00,,47.4,8.0 +2019-05-31 10:00:00,,36.6,37.0 +2019-05-31 11:00:00,,19.6,15.0 +2019-05-31 12:00:00,,17.2,16.0 +2019-05-31 13:00:00,,15.1,18.0 +2019-05-31 14:00:00,,13.3,21.0 +2019-05-31 15:00:00,,13.8,21.0 +2019-05-31 16:00:00,,15.4,24.0 +2019-05-31 17:00:00,,15.4,26.0 +2019-05-31 18:00:00,,16.3,26.0 +2019-05-31 19:00:00,,20.5,29.0 +2019-05-31 20:00:00,,25.2,33.0 +2019-05-31 21:00:00,,23.3,33.0 +2019-05-31 22:00:00,,37.0,31.0 +2019-05-31 23:00:00,,60.2,26.0 +2019-06-01 00:00:00,,68.0,26.0 +2019-06-01 01:00:00,,81.7,22.0 +2019-06-01 02:00:00,,84.7,22.0 +2019-06-01 03:00:00,52.5,74.8,16.0 +2019-06-01 04:00:00,,68.1,16.0 +2019-06-01 05:00:00,,,11.0 +2019-06-01 06:00:00,,,11.0 +2019-06-01 07:00:00,,,4.0 +2019-06-01 08:00:00,,44.6,2.0 +2019-06-01 09:00:00,,46.4,8.0 +2019-06-01 10:00:00,,33.3,9.0 +2019-06-01 11:00:00,,23.9,12.0 +2019-06-01 12:00:00,,13.8,19.0 +2019-06-01 13:00:00,,12.2,28.0 +2019-06-01 14:00:00,,10.4,33.0 +2019-06-01 15:00:00,,10.2,36.0 +2019-06-01 16:00:00,,10.0,33.0 +2019-06-01 17:00:00,,10.2,31.0 +2019-06-01 18:00:00,,11.8,32.0 +2019-06-01 19:00:00,,11.8,36.0 +2019-06-01 20:00:00,,14.5,38.0 +2019-06-01 21:00:00,,24.6,41.0 +2019-06-01 22:00:00,,43.6,44.0 +2019-06-01 23:00:00,,49.4,52.0 +2019-06-02 00:00:00,,48.1,52.0 +2019-06-02 01:00:00,,32.7,44.0 +2019-06-02 02:00:00,,38.1,44.0 +2019-06-02 03:00:00,,38.2,43.0 +2019-06-02 04:00:00,,39.2,43.0 +2019-06-02 05:00:00,,23.2,37.0 +2019-06-02 06:00:00,,24.5,37.0 +2019-06-02 07:00:00,,37.2,32.0 +2019-06-02 08:00:00,,24.1,32.0 +2019-06-02 09:00:00,,18.1,30.0 +2019-06-02 10:00:00,,19.5,32.0 +2019-06-02 11:00:00,,21.0,35.0 +2019-06-02 12:00:00,,18.1,36.0 +2019-06-02 13:00:00,,13.1,35.0 +2019-06-02 14:00:00,,11.5,34.0 +2019-06-02 15:00:00,,13.0,36.0 +2019-06-02 16:00:00,,15.0,33.0 +2019-06-02 17:00:00,,13.9,32.0 +2019-06-02 18:00:00,,14.4,32.0 +2019-06-02 19:00:00,,14.4,34.0 +2019-06-02 20:00:00,,15.6,34.0 +2019-06-02 21:00:00,,25.8,32.0 +2019-06-02 22:00:00,,40.9,28.0 +2019-06-02 23:00:00,,36.9,27.0 +2019-06-03 00:00:00,,27.6,27.0 +2019-06-03 01:00:00,,17.9,21.0 +2019-06-03 02:00:00,,15.7,21.0 +2019-06-03 03:00:00,,11.8,11.0 +2019-06-03 04:00:00,,11.7,11.0 +2019-06-03 05:00:00,,9.8,3.0 +2019-06-03 06:00:00,,11.4,3.0 +2019-06-03 07:00:00,,29.0,5.0 +2019-06-03 08:00:00,,44.1,6.0 +2019-06-03 09:00:00,,50.0,7.0 +2019-06-03 10:00:00,,43.9,5.0 +2019-06-03 11:00:00,,46.0,11.0 +2019-06-03 12:00:00,,31.7,16.0 +2019-06-03 13:00:00,,27.5,14.0 +2019-06-03 14:00:00,,22.1,15.0 +2019-06-03 15:00:00,,25.8,17.0 +2019-06-03 16:00:00,,23.2,21.0 +2019-06-03 17:00:00,,24.8,22.0 +2019-06-03 18:00:00,,25.3,24.0 +2019-06-03 19:00:00,,24.4,24.0 +2019-06-03 20:00:00,,23.1,23.0 +2019-06-03 21:00:00,,28.9,20.0 +2019-06-03 22:00:00,,33.0,20.0 +2019-06-03 23:00:00,,31.1,17.0 +2019-06-04 00:00:00,,30.5,17.0 +2019-06-04 01:00:00,,44.6,12.0 +2019-06-04 02:00:00,,52.4,12.0 +2019-06-04 03:00:00,,43.9,8.0 +2019-06-04 04:00:00,,35.0,8.0 +2019-06-04 05:00:00,,41.6,5.0 +2019-06-04 06:00:00,,28.8,5.0 +2019-06-04 07:00:00,,36.5,14.0 +2019-06-04 08:00:00,,47.7,18.0 +2019-06-04 09:00:00,,53.5,22.0 +2019-06-04 10:00:00,,50.8,35.0 +2019-06-04 11:00:00,,38.5,31.0 +2019-06-04 12:00:00,,23.3,32.0 +2019-06-04 13:00:00,,19.6,35.0 +2019-06-04 14:00:00,,17.7,37.0 +2019-06-04 15:00:00,,17.4,36.0 +2019-06-04 16:00:00,,18.1,38.0 +2019-06-04 17:00:00,,21.5,38.0 +2019-06-04 18:00:00,,26.3,40.0 +2019-06-04 19:00:00,,23.4,29.0 +2019-06-04 20:00:00,,25.2,20.0 +2019-06-04 21:00:00,,17.0,18.0 +2019-06-04 22:00:00,,16.9,17.0 +2019-06-04 23:00:00,,26.3,17.0 +2019-06-05 00:00:00,,33.5,17.0 +2019-06-05 01:00:00,,17.8,13.0 +2019-06-05 02:00:00,,15.7,13.0 +2019-06-05 03:00:00,15.0,10.8,4.0 +2019-06-05 04:00:00,,12.4,4.0 +2019-06-05 05:00:00,,16.2,6.0 +2019-06-05 06:00:00,,24.5,6.0 +2019-06-05 07:00:00,,39.2,2.0 +2019-06-05 08:00:00,,35.8,1.0 +2019-06-05 09:00:00,,36.9,0.0 +2019-06-05 10:00:00,,35.3,0.0 +2019-06-05 11:00:00,,36.8,5.0 +2019-06-05 12:00:00,,42.1,7.0 +2019-06-05 13:00:00,,59.0,9.0 +2019-06-05 14:00:00,,47.2,14.0 +2019-06-05 15:00:00,,33.6,20.0 +2019-06-05 16:00:00,,38.3,20.0 +2019-06-05 17:00:00,,53.5,19.0 +2019-06-05 18:00:00,,37.9,19.0 +2019-06-05 19:00:00,,48.8,19.0 +2019-06-05 20:00:00,,40.8,19.0 +2019-06-05 21:00:00,,37.8,19.0 +2019-06-05 22:00:00,,37.5,19.0 +2019-06-05 23:00:00,,33.7,17.0 +2019-06-06 00:00:00,,30.3,17.0 +2019-06-06 01:00:00,,31.8,8.0 +2019-06-06 02:00:00,,23.8, +2019-06-06 03:00:00,,18.0,4.0 +2019-06-06 04:00:00,,15.2,4.0 +2019-06-06 05:00:00,,19.2,0.0 +2019-06-06 06:00:00,,28.4,0.0 +2019-06-06 07:00:00,,40.3,1.0 +2019-06-06 08:00:00,,40.5,3.0 +2019-06-06 09:00:00,,43.1,0.0 +2019-06-06 10:00:00,,36.0,1.0 +2019-06-06 11:00:00,,26.0,7.0 +2019-06-06 12:00:00,,21.2,7.0 +2019-06-06 13:00:00,,16.4,12.0 +2019-06-06 14:00:00,,16.5,10.0 +2019-06-06 15:00:00,,16.0,11.0 +2019-06-06 16:00:00,,15.1,16.0 +2019-06-06 17:00:00,,,22.0 +2019-06-06 18:00:00,,,24.0 +2019-06-06 19:00:00,,,24.0 +2019-06-06 20:00:00,,,24.0 +2019-06-06 21:00:00,,,22.0 +2019-06-06 22:00:00,,,24.0 +2019-06-06 23:00:00,,,21.0 +2019-06-07 00:00:00,,,21.0 +2019-06-07 01:00:00,,,23.0 +2019-06-07 02:00:00,,,23.0 +2019-06-07 03:00:00,,,27.0 +2019-06-07 04:00:00,,,27.0 +2019-06-07 05:00:00,,,23.0 +2019-06-07 06:00:00,,,23.0 +2019-06-07 07:00:00,,,25.0 +2019-06-07 08:00:00,,28.9,23.0 +2019-06-07 09:00:00,,23.0,24.0 +2019-06-07 10:00:00,,29.3,25.0 +2019-06-07 11:00:00,,34.5,23.0 +2019-06-07 12:00:00,,32.1,25.0 +2019-06-07 13:00:00,,26.7,27.0 +2019-06-07 14:00:00,,17.8,20.0 +2019-06-07 15:00:00,,15.0,15.0 +2019-06-07 16:00:00,,13.1,15.0 +2019-06-07 17:00:00,,15.6,21.0 +2019-06-07 18:00:00,,19.5,24.0 +2019-06-07 19:00:00,,19.5,27.0 +2019-06-07 20:00:00,,19.1,35.0 +2019-06-07 21:00:00,,19.9,36.0 +2019-06-07 22:00:00,,19.4,35.0 +2019-06-07 23:00:00,,16.3, +2019-06-08 00:00:00,,14.7,33.0 +2019-06-08 01:00:00,,14.4,28.0 +2019-06-08 02:00:00,,11.3, +2019-06-08 03:00:00,,9.6,7.0 +2019-06-08 04:00:00,,8.4,7.0 +2019-06-08 05:00:00,,9.8,3.0 +2019-06-08 06:00:00,,10.7,3.0 +2019-06-08 07:00:00,,14.1,2.0 +2019-06-08 08:00:00,,13.8,3.0 +2019-06-08 09:00:00,,14.0,4.0 +2019-06-08 10:00:00,,13.0,2.0 +2019-06-08 11:00:00,,11.7,3.0 +2019-06-08 12:00:00,,10.3,4.0 +2019-06-08 13:00:00,,10.4,8.0 +2019-06-08 14:00:00,,9.2,10.0 +2019-06-08 15:00:00,,11.1,13.0 +2019-06-08 16:00:00,,10.3,17.0 +2019-06-08 17:00:00,,11.7,19.0 +2019-06-08 18:00:00,,14.1,20.0 +2019-06-08 19:00:00,,14.8,20.0 +2019-06-08 20:00:00,,22.0,19.0 +2019-06-08 21:00:00,,,17.0 +2019-06-08 22:00:00,,,16.0 +2019-06-08 23:00:00,,36.7, +2019-06-09 00:00:00,,34.8,20.0 +2019-06-09 01:00:00,,47.0,10.0 +2019-06-09 02:00:00,,55.9,10.0 +2019-06-09 03:00:00,10.0,41.0,7.0 +2019-06-09 04:00:00,,51.2,7.0 +2019-06-09 05:00:00,,51.5,1.0 +2019-06-09 06:00:00,,43.0,1.0 +2019-06-09 07:00:00,,42.2,5.0 +2019-06-09 08:00:00,,36.7,1.0 +2019-06-09 09:00:00,,32.7,0.0 +2019-06-09 10:00:00,,30.2,0.0 +2019-06-09 11:00:00,,25.0,2.0 +2019-06-09 12:00:00,,16.6,5.0 +2019-06-09 13:00:00,,14.6,8.0 +2019-06-09 14:00:00,,14.6,13.0 +2019-06-09 15:00:00,,10.2,17.0 +2019-06-09 16:00:00,,7.9,19.0 +2019-06-09 17:00:00,,7.2,24.0 +2019-06-09 18:00:00,,10.3,26.0 +2019-06-09 19:00:00,,13.0,20.0 +2019-06-09 20:00:00,,19.5,21.0 +2019-06-09 21:00:00,,30.6,21.0 +2019-06-09 22:00:00,,33.2,22.0 +2019-06-09 23:00:00,,30.9, +2019-06-10 00:00:00,,37.1,24.0 +2019-06-10 01:00:00,,39.9,21.0 +2019-06-10 02:00:00,,28.1,21.0 +2019-06-10 03:00:00,18.5,19.3,25.0 +2019-06-10 04:00:00,,17.8,25.0 +2019-06-10 05:00:00,,18.0,24.0 +2019-06-10 06:00:00,,13.7,24.0 +2019-06-10 07:00:00,,21.3,24.0 +2019-06-10 08:00:00,,26.7,22.0 +2019-06-10 09:00:00,,23.0,27.0 +2019-06-10 10:00:00,,16.9,34.0 +2019-06-10 11:00:00,,18.5,45.0 +2019-06-10 12:00:00,,14.1,41.0 +2019-06-10 13:00:00,,12.2,45.0 +2019-06-10 14:00:00,,11.7,51.0 +2019-06-10 15:00:00,,9.6,40.0 +2019-06-10 16:00:00,,9.5,40.0 +2019-06-10 17:00:00,,11.7,31.0 +2019-06-10 18:00:00,,15.1,28.0 +2019-06-10 19:00:00,,19.1,26.0 +2019-06-10 20:00:00,,18.4,25.0 +2019-06-10 21:00:00,,22.3,26.0 +2019-06-10 22:00:00,,22.6,24.0 +2019-06-10 23:00:00,,23.5,23.0 +2019-06-11 00:00:00,,24.8,23.0 +2019-06-11 01:00:00,,24.1,15.0 +2019-06-11 02:00:00,,19.6,15.0 +2019-06-11 03:00:00,7.5,19.1,16.0 +2019-06-11 04:00:00,,29.6,16.0 +2019-06-11 05:00:00,,32.3,13.0 +2019-06-11 06:00:00,,52.7,13.0 +2019-06-11 07:00:00,,58.7,17.0 +2019-06-11 08:00:00,,55.4,18.0 +2019-06-11 09:00:00,,58.0,21.0 +2019-06-11 10:00:00,,43.6,23.0 +2019-06-11 11:00:00,,31.7,22.0 +2019-06-11 12:00:00,,22.1,22.0 +2019-06-11 13:00:00,,17.3,23.0 +2019-06-11 14:00:00,,12.6,26.0 +2019-06-11 15:00:00,,13.1,35.0 +2019-06-11 16:00:00,,16.6,31.0 +2019-06-11 17:00:00,,19.8,31.0 +2019-06-11 18:00:00,,22.6,30.0 +2019-06-11 19:00:00,,35.5,31.0 +2019-06-11 20:00:00,,44.6,30.0 +2019-06-11 21:00:00,,36.1,22.0 +2019-06-11 22:00:00,,42.7,22.0 +2019-06-11 23:00:00,,54.1,20.0 +2019-06-12 00:00:00,,59.4,20.0 +2019-06-12 01:00:00,,41.5,15.0 +2019-06-12 02:00:00,,37.2, +2019-06-12 03:00:00,21.0,41.9, +2019-06-12 04:00:00,,34.7,11.0 +2019-06-12 05:00:00,,36.3,9.0 +2019-06-12 06:00:00,,44.9,9.0 +2019-06-12 07:00:00,,42.7,12.0 +2019-06-12 08:00:00,,38.4,17.0 +2019-06-12 09:00:00,,44.4,20.0 +2019-06-12 10:00:00,,35.5,22.0 +2019-06-12 11:00:00,,26.7,25.0 +2019-06-12 12:00:00,,0.0,35.0 +2019-06-12 13:00:00,,0.0,33.0 +2019-06-12 14:00:00,,15.4,33.0 +2019-06-12 15:00:00,,17.9,35.0 +2019-06-12 16:00:00,,20.3,42.0 +2019-06-12 17:00:00,,16.8,45.0 +2019-06-12 18:00:00,,23.6,43.0 +2019-06-12 19:00:00,,24.2,45.0 +2019-06-12 20:00:00,,25.3,33.0 +2019-06-12 21:00:00,,23.4,41.0 +2019-06-12 22:00:00,,29.2,43.0 +2019-06-12 23:00:00,,29.3, +2019-06-13 00:00:00,,25.6,35.0 +2019-06-13 01:00:00,,26.9,29.0 +2019-06-13 02:00:00,,20.0, +2019-06-13 03:00:00,28.5,18.7,26.0 +2019-06-13 04:00:00,,18.0,26.0 +2019-06-13 05:00:00,,18.8,16.0 +2019-06-13 06:00:00,,24.6,16.0 +2019-06-13 07:00:00,,37.0,19.0 +2019-06-13 08:00:00,,39.8,21.0 +2019-06-13 09:00:00,,40.9,19.0 +2019-06-13 10:00:00,,35.3,16.0 +2019-06-13 11:00:00,,30.2,18.0 +2019-06-13 12:00:00,,24.5,19.0 +2019-06-13 13:00:00,,22.7,19.0 +2019-06-13 14:00:00,,17.9,16.0 +2019-06-13 15:00:00,,18.2,15.0 +2019-06-13 16:00:00,,19.4,13.0 +2019-06-13 17:00:00,,28.8,11.0 +2019-06-13 18:00:00,,36.1,15.0 +2019-06-13 19:00:00,,38.2,14.0 +2019-06-13 20:00:00,,24.0,13.0 +2019-06-13 21:00:00,,27.5,14.0 +2019-06-13 22:00:00,,31.5,15.0 +2019-06-13 23:00:00,,58.8,15.0 +2019-06-14 00:00:00,,77.9,15.0 +2019-06-14 01:00:00,,78.3,13.0 +2019-06-14 02:00:00,,74.2, +2019-06-14 03:00:00,,68.1,8.0 +2019-06-14 04:00:00,,66.6,8.0 +2019-06-14 05:00:00,,48.5,6.0 +2019-06-14 06:00:00,,37.9,6.0 +2019-06-14 07:00:00,,49.3,13.0 +2019-06-14 08:00:00,,64.3,11.0 +2019-06-14 09:00:00,,51.5,11.0 +2019-06-14 10:00:00,,34.3,14.0 +2019-06-14 11:00:00,36.5,27.9,13.0 +2019-06-14 12:00:00,,25.1,13.0 +2019-06-14 13:00:00,,21.8,15.0 +2019-06-14 14:00:00,,17.1,16.0 +2019-06-14 15:00:00,,15.4,22.0 +2019-06-14 16:00:00,,14.2,25.0 +2019-06-14 17:00:00,,15.2,25.0 +2019-06-14 18:00:00,,18.9,26.0 +2019-06-14 19:00:00,,16.6,27.0 +2019-06-14 20:00:00,,19.0,26.0 +2019-06-14 21:00:00,,25.0,26.0 +2019-06-14 22:00:00,,41.9,25.0 +2019-06-14 23:00:00,,55.0,26.0 +2019-06-15 00:00:00,,35.3,26.0 +2019-06-15 01:00:00,,32.1,26.0 +2019-06-15 02:00:00,,29.6, +2019-06-15 03:00:00,17.5,29.0, +2019-06-15 04:00:00,,33.9, +2019-06-15 05:00:00,,,10.0 +2019-06-15 06:00:00,,,10.0 +2019-06-15 07:00:00,,,13.0 +2019-06-15 08:00:00,,35.8,13.0 +2019-06-15 09:00:00,,24.1,8.0 +2019-06-15 10:00:00,,17.6,8.0 +2019-06-15 11:00:00,,14.0,12.0 +2019-06-15 12:00:00,,12.1,14.0 +2019-06-15 13:00:00,,11.1,13.0 +2019-06-15 14:00:00,,9.4,18.0 +2019-06-15 15:00:00,,9.0,17.0 +2019-06-15 16:00:00,,9.6,18.0 +2019-06-15 17:00:00,,10.5,18.0 +2019-06-15 18:00:00,,10.7,20.0 +2019-06-15 19:00:00,,11.1,22.0 +2019-06-15 20:00:00,,14.0,22.0 +2019-06-15 21:00:00,,14.2,21.0 +2019-06-15 22:00:00,,15.2,20.0 +2019-06-15 23:00:00,,17.2,19.0 +2019-06-16 00:00:00,,20.1,19.0 +2019-06-16 01:00:00,,22.6,15.0 +2019-06-16 02:00:00,,16.5,15.0 +2019-06-16 03:00:00,42.5,12.8,12.0 +2019-06-16 04:00:00,,11.4,12.0 +2019-06-16 05:00:00,,11.2,10.0 +2019-06-16 06:00:00,,11.7,10.0 +2019-06-16 07:00:00,,14.0,8.0 +2019-06-16 08:00:00,,11.6,5.0 +2019-06-16 09:00:00,,10.2,4.0 +2019-06-16 10:00:00,,9.9,5.0 +2019-06-16 11:00:00,,9.4,6.0 +2019-06-16 12:00:00,,8.7,6.0 +2019-06-16 13:00:00,,12.9,10.0 +2019-06-16 14:00:00,,11.2,16.0 +2019-06-16 15:00:00,,8.7,23.0 +2019-06-16 16:00:00,,8.1,26.0 +2019-06-16 17:00:00,,8.4,29.0 +2019-06-16 18:00:00,,9.2,29.0 +2019-06-16 19:00:00,,11.8,28.0 +2019-06-16 20:00:00,,12.3,28.0 +2019-06-16 21:00:00,,14.4,27.0 +2019-06-16 22:00:00,,23.3,25.0 +2019-06-16 23:00:00,,42.7, +2019-06-17 00:00:00,,56.6,23.0 +2019-06-17 01:00:00,,67.3,17.0 +2019-06-17 02:00:00,,69.3,17.0 +2019-06-17 03:00:00,42.0,58.8,14.0 +2019-06-17 04:00:00,35.5,53.1,14.0 +2019-06-17 05:00:00,36.0,49.1,11.0 +2019-06-17 06:00:00,39.5,45.7,11.0 +2019-06-17 07:00:00,42.5,44.8,12.0 +2019-06-17 08:00:00,43.5,52.3,13.0 +2019-06-17 09:00:00,45.0,54.4,13.0 +2019-06-17 10:00:00,41.0,51.6,11.0 +2019-06-17 11:00:00,,30.4,11.0 +2019-06-17 12:00:00,,16.0,11.0 +2019-06-17 13:00:00,,15.2, +2019-06-17 14:00:00,,10.1, +2019-06-17 15:00:00,,9.6, +2019-06-17 16:00:00,,11.5, +2019-06-17 17:00:00,,13.1, +2019-06-17 18:00:00,,11.9, +2019-06-17 19:00:00,,14.9, +2019-06-17 20:00:00,,15.4, +2019-06-17 21:00:00,,15.2, +2019-06-17 22:00:00,,20.5, +2019-06-17 23:00:00,,38.3, +2019-06-18 00:00:00,,51.0, +2019-06-18 01:00:00,,73.3, +2019-06-18 02:00:00,,66.2, +2019-06-18 03:00:00,,60.1, +2019-06-18 04:00:00,,39.8, +2019-06-18 05:00:00,,45.5, +2019-06-18 06:00:00,,26.5, +2019-06-18 07:00:00,,33.8, +2019-06-18 08:00:00,,51.4, +2019-06-18 09:00:00,,52.6, +2019-06-18 10:00:00,,49.6, +2019-06-18 21:00:00,,15.3, +2019-06-18 22:00:00,,17.0, +2019-06-18 23:00:00,,23.1, +2019-06-19 00:00:00,,39.3, +2019-06-19 11:00:00,,27.3, +2019-06-19 12:00:00,,26.6, +2019-06-20 15:00:00,,19.4, +2019-06-20 16:00:00,,20.1, +2019-06-20 17:00:00,,19.3, +2019-06-20 18:00:00,,19.0, +2019-06-20 19:00:00,,23.2, +2019-06-20 20:00:00,,23.9, +2019-06-20 21:00:00,,25.3, +2019-06-20 22:00:00,,21.4, +2019-06-20 23:00:00,,24.9, +2019-06-21 00:00:00,,26.5, +2019-06-21 01:00:00,,21.8, +2019-06-21 02:00:00,,20.0, diff --git a/doc/data/air_quality_no2_long.csv b/doc/data/air_quality_no2_long.csv new file mode 100644 index 0000000000000..5d959370b7d48 --- /dev/null +++ b/doc/data/air_quality_no2_long.csv @@ -0,0 +1,2069 @@ +city,country,date.utc,location,parameter,value,unit +Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-06-20 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-20 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-20 17:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-20 16:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-20 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-20 14:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-20 13:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-19 10:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-06-19 09:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-06-18 22:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-06-18 21:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-18 20:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-18 19:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-06-18 08:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-06-18 07:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-06-18 06:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-06-18 05:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-06-18 04:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-18 03:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-06-18 02:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-18 01:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-06-18 00:00:00+00:00,FR04014,no2,66.2,µg/m³ +Paris,FR,2019-06-17 23:00:00+00:00,FR04014,no2,73.3,µg/m³ +Paris,FR,2019-06-17 22:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-06-17 21:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-17 20:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-06-17 19:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 18:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-17 17:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-06-17 16:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-06-17 15:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-17 14:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-17 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-17 12:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-06-17 11:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 10:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-17 09:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-06-17 08:00:00+00:00,FR04014,no2,51.6,µg/m³ +Paris,FR,2019-06-17 07:00:00+00:00,FR04014,no2,54.4,µg/m³ +Paris,FR,2019-06-17 06:00:00+00:00,FR04014,no2,52.3,µg/m³ +Paris,FR,2019-06-17 05:00:00+00:00,FR04014,no2,44.8,µg/m³ +Paris,FR,2019-06-17 04:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-06-17 03:00:00+00:00,FR04014,no2,49.1,µg/m³ +Paris,FR,2019-06-17 02:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-06-17 01:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-17 00:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-06-16 23:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-06-16 22:00:00+00:00,FR04014,no2,56.6,µg/m³ +Paris,FR,2019-06-16 21:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-16 20:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-16 18:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-06-16 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-16 16:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-16 15:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-06-16 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 12:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 11:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-06-16 10:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 09:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-16 08:00:00+00:00,FR04014,no2,9.9,µg/m³ +Paris,FR,2019-06-16 07:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-16 06:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-06-16 05:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-16 04:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-16 03:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 02:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-16 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-06-16 00:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-15 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-15 22:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-15 21:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-06-15 20:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-15 19:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-15 18:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 17:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 16:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-15 15:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-06-15 14:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-15 13:00:00+00:00,FR04014,no2,9.0,µg/m³ +Paris,FR,2019-06-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-15 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 10:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-06-15 09:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 08:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-06-15 07:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-15 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-15 02:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-06-15 01:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-15 00:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-14 23:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-14 22:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-14 21:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-06-14 20:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-14 19:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-14 18:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-14 17:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-14 16:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-06-14 15:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-14 14:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-14 12:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-06-14 11:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-14 10:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-06-14 09:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-06-14 08:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-06-14 07:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-14 06:00:00+00:00,FR04014,no2,64.3,µg/m³ +Paris,FR,2019-06-14 05:00:00+00:00,FR04014,no2,49.3,µg/m³ +Paris,FR,2019-06-14 04:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-14 03:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-06-14 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-06-14 01:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-14 00:00:00+00:00,FR04014,no2,74.2,µg/m³ +Paris,FR,2019-06-13 23:00:00+00:00,FR04014,no2,78.3,µg/m³ +Paris,FR,2019-06-13 22:00:00+00:00,FR04014,no2,77.9,µg/m³ +Paris,FR,2019-06-13 21:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-13 20:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-06-13 19:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-13 18:00:00+00:00,FR04014,no2,24.0,µg/m³ +Paris,FR,2019-06-13 17:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-13 16:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-13 15:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-13 14:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-13 13:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-06-13 12:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-13 11:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-06-13 10:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-13 09:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-13 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-13 07:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-13 06:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-13 05:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-06-13 04:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-13 03:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-06-13 02:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-13 01:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-06-13 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-12 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-06-12 22:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-06-12 21:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-12 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-06-12 19:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-12 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-12 17:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-06-12 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-06-12 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-06-12 14:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-06-12 13:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-12 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-12 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 09:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-12 08:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-12 07:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-06-12 06:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-06-12 05:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-12 04:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-06-12 03:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-06-12 02:00:00+00:00,FR04014,no2,34.7,µg/m³ +Paris,FR,2019-06-12 01:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-12 00:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-11 23:00:00+00:00,FR04014,no2,41.5,µg/m³ +Paris,FR,2019-06-11 22:00:00+00:00,FR04014,no2,59.4,µg/m³ +Paris,FR,2019-06-11 21:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-06-11 20:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-11 19:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-11 18:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-11 17:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-11 16:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-11 15:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-06-11 14:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-11 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-11 12:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-06-11 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-06-11 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-11 09:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-11 08:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-11 07:00:00+00:00,FR04014,no2,58.0,µg/m³ +Paris,FR,2019-06-11 06:00:00+00:00,FR04014,no2,55.4,µg/m³ +Paris,FR,2019-06-11 05:00:00+00:00,FR04014,no2,58.7,µg/m³ +Paris,FR,2019-06-11 04:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-06-11 03:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-06-11 02:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-11 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-11 00:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-10 23:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-10 22:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-10 21:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-06-10 20:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-10 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-06-10 18:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-06-10 17:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-10 16:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-10 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 14:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-06-10 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-10 12:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-10 10:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-10 09:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-06-10 08:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-10 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-10 06:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-10 05:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-06-10 04:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-06-10 03:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-10 02:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-10 01:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-10 00:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-06-09 23:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-06-09 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-06-09 21:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-06-09 20:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-06-09 19:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-06-09 18:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-09 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-09 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-09 15:00:00+00:00,FR04014,no2,7.2,µg/m³ +Paris,FR,2019-06-09 14:00:00+00:00,FR04014,no2,7.9,µg/m³ +Paris,FR,2019-06-09 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-09 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 11:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 10:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-09 09:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-09 08:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-09 07:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-09 06:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-09 05:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-06-09 04:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-06-09 03:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-09 02:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-06-09 01:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-06-09 00:00:00+00:00,FR04014,no2,55.9,µg/m³ +Paris,FR,2019-06-08 23:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-06-08 22:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-06-08 21:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-08 18:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-06-08 17:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-06-08 16:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 14:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 13:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-08 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-08 11:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-08 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 08:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-08 07:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-08 06:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-08 05:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 04:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-08 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-08 02:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-08 01:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-08 00:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-06-07 23:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-07 22:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-06-07 21:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-06-07 20:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-07 19:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-06-07 18:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-07 17:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 15:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-07 14:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-07 13:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-07 12:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-07 11:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-07 10:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-06-07 08:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-07 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-07 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-06 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-06 13:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-06 12:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-06 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-06-06 10:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-06-06 09:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-06-06 08:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-06-06 07:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-06-06 06:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-06-06 05:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-06-06 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-06-06 03:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-06-06 02:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-06 01:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-06 00:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-06-05 23:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-06-05 22:00:00+00:00,FR04014,no2,30.3,µg/m³ +Paris,FR,2019-06-05 21:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-06-05 20:00:00+00:00,FR04014,no2,37.5,µg/m³ +Paris,FR,2019-06-05 19:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-06-05 18:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-06-05 17:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-06-05 16:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-05 15:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-05 14:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-05 13:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-06-05 12:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-06-05 11:00:00+00:00,FR04014,no2,59.0,µg/m³ +Paris,FR,2019-06-05 10:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-06-05 09:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-06-05 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-05 07:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-05 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-05 05:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-05 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-05 03:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-06-05 02:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-06-05 01:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-06-05 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-04 23:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-04 22:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-06-04 21:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 20:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-04 19:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-04 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-06-04 17:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-04 16:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 15:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-06-04 14:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-04 13:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-06-04 12:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-06-04 11:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-04 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-04 09:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-06-04 08:00:00+00:00,FR04014,no2,50.8,µg/m³ +Paris,FR,2019-06-04 07:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-04 06:00:00+00:00,FR04014,no2,47.7,µg/m³ +Paris,FR,2019-06-04 05:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-06-04 04:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-04 03:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-06-04 02:00:00+00:00,FR04014,no2,35.0,µg/m³ +Paris,FR,2019-06-04 01:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-04 00:00:00+00:00,FR04014,no2,52.4,µg/m³ +Paris,FR,2019-06-03 23:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-03 22:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-06-03 21:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-06-03 20:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-06-03 19:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-03 18:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-03 17:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-06-03 16:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-03 15:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-03 14:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-03 13:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-03 12:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-03 11:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-03 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-03 09:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-06-03 08:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-03 07:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-06-03 06:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-06-03 05:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-03 04:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-03 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-03 02:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-03 01:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-03 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-02 23:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-02 22:00:00+00:00,FR04014,no2,27.6,µg/m³ +Paris,FR,2019-06-02 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-02 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-02 19:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-02 18:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-02 17:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 16:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 15:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-06-02 14:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-02 13:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-02 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-02 11:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-02 10:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 09:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-06-02 08:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-02 07:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 06:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-02 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-02 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-02 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-02 02:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-02 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-02 00:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-06-01 23:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-01 22:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-06-01 21:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-06-01 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-01 19:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-01 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-06-01 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 16:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 15:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 14:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-06-01 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 12:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-01 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-01 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-01 09:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-01 08:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-06-01 07:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-06-01 06:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-01 02:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-01 01:00:00+00:00,FR04014,no2,74.8,µg/m³ +Paris,FR,2019-06-01 00:00:00+00:00,FR04014,no2,84.7,µg/m³ +Paris,FR,2019-05-31 23:00:00+00:00,FR04014,no2,81.7,µg/m³ +Paris,FR,2019-05-31 22:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-05-31 21:00:00+00:00,FR04014,no2,60.2,µg/m³ +Paris,FR,2019-05-31 20:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-31 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-31 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-31 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-31 16:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-31 15:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 14:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 13:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-31 12:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-31 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-31 10:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-31 09:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-31 08:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-31 07:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-31 06:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-31 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-31 04:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-31 03:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-31 02:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-31 01:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-31 00:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-30 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-30 22:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-30 21:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-05-30 20:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-30 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-30 18:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-30 17:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-30 16:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-30 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-30 14:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 13:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-30 12:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-05-30 11:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-30 09:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-30 08:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-30 07:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-05-30 06:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-30 05:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-30 04:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-30 03:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-30 02:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-30 01:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-05-30 00:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-29 23:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-29 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 21:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-29 20:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-29 19:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-29 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-29 16:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-29 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 13:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-29 12:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-29 11:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-29 10:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-29 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-29 08:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-05-29 07:00:00+00:00,FR04014,no2,50.5,µg/m³ +Paris,FR,2019-05-29 06:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-29 05:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-29 04:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 03:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-29 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-29 00:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-28 23:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-28 22:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-05-28 21:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 20:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 19:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 18:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-28 17:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-28 16:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-28 15:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-28 14:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-28 13:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 12:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-28 11:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-28 10:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-28 09:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-28 08:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-28 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-28 06:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-28 05:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-28 04:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-05-28 03:00:00+00:00,FR04014,no2,6.1,µg/m³ +Paris,FR,2019-05-28 02:00:00+00:00,FR04014,no2,6.4,µg/m³ +Paris,FR,2019-05-28 01:00:00+00:00,FR04014,no2,8.2,µg/m³ +Paris,FR,2019-05-28 00:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-27 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-05-27 22:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-27 21:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-27 20:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-27 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-27 18:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-27 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-27 15:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 14:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 13:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-27 12:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 11:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-27 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-27 09:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-05-27 08:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-27 07:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-27 06:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-05-27 05:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-27 04:00:00+00:00,FR04014,no2,6.5,µg/m³ +Paris,FR,2019-05-27 03:00:00+00:00,FR04014,no2,4.8,µg/m³ +Paris,FR,2019-05-27 02:00:00+00:00,FR04014,no2,5.9,µg/m³ +Paris,FR,2019-05-27 01:00:00+00:00,FR04014,no2,7.1,µg/m³ +Paris,FR,2019-05-27 00:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-05-26 23:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 22:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-26 21:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-26 20:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-26 19:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-26 18:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-26 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-26 16:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-26 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-26 14:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-26 13:00:00+00:00,FR04014,no2,12.5,µg/m³ +Paris,FR,2019-05-26 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-05-26 11:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-26 10:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-26 09:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 08:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-26 07:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-26 06:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-26 05:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-26 04:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-26 03:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-26 02:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-26 01:00:00+00:00,FR04014,no2,49.8,µg/m³ +Paris,FR,2019-05-26 00:00:00+00:00,FR04014,no2,67.0,µg/m³ +Paris,FR,2019-05-25 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-05-25 22:00:00+00:00,FR04014,no2,63.9,µg/m³ +Paris,FR,2019-05-25 21:00:00+00:00,FR04014,no2,39.5,µg/m³ +Paris,FR,2019-05-25 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-25 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-25 18:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-25 17:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-25 16:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-05-25 15:00:00+00:00,FR04014,no2,30.0,µg/m³ +Paris,FR,2019-05-25 14:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-25 13:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-25 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-05-25 11:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-25 10:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-05-25 09:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-25 08:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-05-25 07:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-25 06:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-25 02:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-25 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-25 00:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-05-24 23:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-24 22:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-24 21:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-05-24 20:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-24 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-24 18:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-24 17:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-24 16:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-24 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-24 14:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-24 13:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-24 12:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-24 11:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-05-24 10:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-24 09:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-24 08:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-05-24 07:00:00+00:00,FR04014,no2,54.8,µg/m³ +Paris,FR,2019-05-24 06:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-05-24 05:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-24 04:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-24 03:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-24 02:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-24 01:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-24 00:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-23 23:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-23 22:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-23 21:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-05-23 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-23 19:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-05-23 18:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-23 17:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-23 16:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-23 15:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-23 14:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-23 13:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-05-23 12:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-23 11:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-23 10:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-23 09:00:00+00:00,FR04014,no2,79.4,µg/m³ +Paris,FR,2019-05-23 08:00:00+00:00,FR04014,no2,97.0,µg/m³ +Paris,FR,2019-05-23 07:00:00+00:00,FR04014,no2,91.8,µg/m³ +Paris,FR,2019-05-23 06:00:00+00:00,FR04014,no2,79.6,µg/m³ +Paris,FR,2019-05-23 05:00:00+00:00,FR04014,no2,68.7,µg/m³ +Paris,FR,2019-05-23 04:00:00+00:00,FR04014,no2,71.9,µg/m³ +Paris,FR,2019-05-23 03:00:00+00:00,FR04014,no2,76.8,µg/m³ +Paris,FR,2019-05-23 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-05-23 01:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-23 00:00:00+00:00,FR04014,no2,53.3,µg/m³ +Paris,FR,2019-05-22 23:00:00+00:00,FR04014,no2,62.1,µg/m³ +Paris,FR,2019-05-22 22:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-05-22 21:00:00+00:00,FR04014,no2,37.7,µg/m³ +Paris,FR,2019-05-22 20:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-05-22 19:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-22 18:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-22 17:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-05-22 16:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-22 15:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-22 14:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-22 13:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-05-22 12:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-05-22 11:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-05-22 10:00:00+00:00,FR04014,no2,57.8,µg/m³ +Paris,FR,2019-05-22 09:00:00+00:00,FR04014,no2,63.1,µg/m³ +Paris,FR,2019-05-22 08:00:00+00:00,FR04014,no2,70.8,µg/m³ +Paris,FR,2019-05-22 07:00:00+00:00,FR04014,no2,75.4,µg/m³ +Paris,FR,2019-05-22 06:00:00+00:00,FR04014,no2,75.7,µg/m³ +Paris,FR,2019-05-22 05:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-22 04:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-22 03:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-22 02:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-22 01:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-22 00:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-21 23:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-21 22:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-21 21:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-05-21 20:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-05-21 19:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-05-21 18:00:00+00:00,FR04014,no2,54.3,µg/m³ +Paris,FR,2019-05-21 17:00:00+00:00,FR04014,no2,75.0,µg/m³ +Paris,FR,2019-05-21 16:00:00+00:00,FR04014,no2,42.3,µg/m³ +Paris,FR,2019-05-21 15:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-21 14:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-05-21 13:00:00+00:00,FR04014,no2,49.7,µg/m³ +Paris,FR,2019-05-21 12:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-05-21 11:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-05-21 10:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-21 09:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-21 08:00:00+00:00,FR04014,no2,54.2,µg/m³ +Paris,FR,2019-05-21 07:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-21 06:00:00+00:00,FR04014,no2,62.6,µg/m³ +Paris,FR,2019-05-21 05:00:00+00:00,FR04014,no2,38.0,µg/m³ +Paris,FR,2019-05-21 04:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-21 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-21 02:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-21 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-21 00:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-20 23:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-20 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-20 21:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-20 20:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-20 19:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-20 18:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-20 17:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-20 16:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-20 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-20 14:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-20 13:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-05-20 12:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-20 11:00:00+00:00,FR04014,no2,35.4,µg/m³ +Paris,FR,2019-05-20 10:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-05-20 09:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-05-20 08:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-20 07:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-05-20 06:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-20 05:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-20 04:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-20 03:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-05-20 02:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-20 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-20 00:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-19 23:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-19 22:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-19 21:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-19 20:00:00+00:00,FR04014,no2,35.6,µg/m³ +Paris,FR,2019-05-19 19:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-05-19 18:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-19 17:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-19 16:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-19 15:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 14:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-19 13:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-19 12:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-19 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-19 10:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-19 09:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-19 08:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 07:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-19 06:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-19 05:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-05-19 04:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-19 03:00:00+00:00,FR04014,no2,36.4,µg/m³ +Paris,FR,2019-05-19 02:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-19 01:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-19 00:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-05-18 23:00:00+00:00,FR04014,no2,50.2,µg/m³ +Paris,FR,2019-05-18 22:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-05-18 21:00:00+00:00,FR04014,no2,59.3,µg/m³ +Paris,FR,2019-05-18 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-18 19:00:00+00:00,FR04014,no2,67.5,µg/m³ +Paris,FR,2019-05-18 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-05-18 17:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-18 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-18 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-18 14:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-05-18 13:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-18 12:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-18 11:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-18 10:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-18 09:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-18 08:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-18 07:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-18 06:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-18 05:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-18 04:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-18 03:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-18 02:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-18 01:00:00+00:00,FR04014,no2,37.4,µg/m³ +Paris,FR,2019-05-18 00:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-05-17 23:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-17 22:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-17 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-17 20:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-17 19:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-05-17 18:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-17 17:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-17 16:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-17 15:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-17 14:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-17 13:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-17 12:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-17 11:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-17 10:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-05-17 09:00:00+00:00,FR04014,no2,60.5,µg/m³ +Paris,FR,2019-05-17 08:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-05-17 07:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-05-17 06:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-17 05:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-17 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-17 03:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-17 02:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-17 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-17 00:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-16 23:00:00+00:00,FR04014,no2,43.7,µg/m³ +Paris,FR,2019-05-16 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-05-16 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-16 20:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-05-16 18:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-16 17:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-16 15:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-05-16 13:00:00+00:00,FR04014,no2,8.5,µg/m³ +Paris,FR,2019-05-16 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-16 11:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-16 10:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 09:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-16 08:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-16 07:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-16 05:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-05-16 04:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-16 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-16 02:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-16 01:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-16 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-15 23:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-15 22:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-15 21:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-15 20:00:00+00:00,FR04014,no2,30.1,µg/m³ +Paris,FR,2019-05-15 19:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-15 18:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-15 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 16:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-15 15:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 14:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-05-15 13:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-15 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 09:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 08:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-15 07:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-15 06:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-15 05:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-15 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-15 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-15 02:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-15 01:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-15 00:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-14 23:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-14 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-14 21:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-14 20:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-14 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-14 18:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-14 17:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-14 16:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-14 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-14 14:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-14 13:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-14 12:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-05-14 11:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-14 10:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-14 09:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 08:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-14 07:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-14 06:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-14 05:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-14 04:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-05-14 03:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-14 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-14 00:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-05-13 23:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-13 22:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-13 21:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-13 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-13 19:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-13 18:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-13 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-13 16:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-13 15:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-13 14:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-05-13 13:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-13 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-13 11:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-13 10:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-13 09:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-13 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-13 07:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-05-13 06:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-05-13 05:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-13 04:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-13 03:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 02:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-13 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 00:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-12 23:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-12 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-12 21:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-12 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-12 19:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-12 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-12 17:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-05-12 16:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 15:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-12 14:00:00+00:00,FR04014,no2,9.1,µg/m³ +Paris,FR,2019-05-12 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-05-12 12:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-05-12 11:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 10:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 08:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-12 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-12 06:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-12 05:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 04:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-12 03:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-05-12 02:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-12 01:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 00:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-11 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-11 22:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-11 21:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-11 20:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-05-11 19:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-11 18:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-11 17:00:00+00:00,FR04014,no2,32.0,µg/m³ +Paris,FR,2019-05-11 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-11 15:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-11 14:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-11 13:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-11 12:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-05-11 11:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-11 10:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-05-11 09:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-05-11 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-11 07:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-11 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-11 02:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-11 01:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-11 00:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-10 23:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-10 22:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-10 21:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-10 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-10 19:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-10 18:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-10 17:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 16:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-10 15:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-10 14:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-10 13:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-10 12:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-10 11:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-10 10:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-10 09:00:00+00:00,FR04014,no2,53.4,µg/m³ +Paris,FR,2019-05-10 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-05-10 07:00:00+00:00,FR04014,no2,57.3,µg/m³ +Paris,FR,2019-05-10 06:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-10 05:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 04:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-10 03:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-05-10 02:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-05-10 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-10 00:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-09 23:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-09 22:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-09 21:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-09 19:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-09 18:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-09 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-09 16:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-09 15:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-09 14:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-09 13:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-09 12:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-09 11:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-09 10:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-09 09:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-05-09 08:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-09 07:00:00+00:00,FR04014,no2,49.0,µg/m³ +Paris,FR,2019-05-09 06:00:00+00:00,FR04014,no2,50.7,µg/m³ +Paris,FR,2019-05-09 05:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 04:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-09 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-09 02:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-09 01:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-09 00:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-05-08 23:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-08 22:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-08 21:00:00+00:00,FR04014,no2,48.9,µg/m³ +Paris,FR,2019-05-08 20:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-08 19:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-08 18:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-05-08 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-08 16:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-08 15:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-08 14:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-08 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-05-08 12:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-08 11:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-08 10:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-08 09:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-05-08 08:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-08 07:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-08 06:00:00+00:00,FR04014,no2,21.7,µg/m³ +Paris,FR,2019-05-08 05:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-08 04:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-08 03:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-08 02:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-08 01:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-08 00:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-07 23:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-07 22:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-07 21:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-07 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-07 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-07 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-07 17:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-07 16:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-07 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-07 14:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-07 13:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-07 12:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-07 11:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-07 10:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-07 08:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-07 07:00:00+00:00,FR04014,no2,67.9,µg/m³ +Paris,FR,2019-05-07 06:00:00+00:00,FR04014,no2,77.7,µg/m³ +Paris,FR,2019-05-07 05:00:00+00:00,FR04014,no2,72.4,µg/m³ +Paris,FR,2019-05-07 04:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-07 03:00:00+00:00,FR04014,no2,50.4,µg/m³ +Paris,FR,2019-05-07 02:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,no2,41.0,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,no2,39.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,no2,36.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,no2,42.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,no2,10.0,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,no2,11.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,no2,53.0,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,no2,29.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,no2,74.5,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,no2,60.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,no2,38.0,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-20 00:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 23:00:00+00:00,BETR801,no2,16.5,µg/m³ +Antwerpen,BE,2019-05-19 22:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,no2,41.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,no2,26.5,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,no2,97.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ diff --git a/doc/data/air_quality_parameters.csv b/doc/data/air_quality_parameters.csv new file mode 100644 index 0000000000000..915f6300e43b8 --- /dev/null +++ b/doc/data/air_quality_parameters.csv @@ -0,0 +1,8 @@ +id,description,name +bc,Black Carbon,BC +co,Carbon Monoxide,CO +no2,Nitrogen Dioxide,NO2 +o3,Ozone,O3 +pm10,Particulate matter less than 10 micrometers in diameter,PM10 +pm25,Particulate matter less than 2.5 micrometers in diameter,PM2.5 +so2,Sulfur Dioxide,SO2 diff --git a/doc/data/air_quality_pm25_long.csv b/doc/data/air_quality_pm25_long.csv new file mode 100644 index 0000000000000..f74053c249339 --- /dev/null +++ b/doc/data/air_quality_pm25_long.csv @@ -0,0 +1,1111 @@ +city,country,date.utc,location,parameter,value,unit +Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,pm25,12.0,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-06-08 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-06 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-04 01:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-06-03 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-06-02 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,pm25,15.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,pm25,20.5,µg/m³ +Antwerpen,BE,2019-05-20 17:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 16:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,pm25,17.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,pm25,28.0,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,pm25,35.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,pm25,40.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,pm25,35.0,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,pm25,34.0,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,pm25,42.5,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,pm25,56.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,pm25,58.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,pm25,60.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,pm25,56.5,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,pm25,52.0,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,pm25,49.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,pm25,42.0,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,pm25,40.5,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,pm25,37.0,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-05-17 01:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,pm25,11.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +London,GB,2019-06-21 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-19 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-18 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,pm25,5.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ diff --git a/doc/data/air_quality_stations.csv b/doc/data/air_quality_stations.csv new file mode 100644 index 0000000000000..9ab1a377dcdae --- /dev/null +++ b/doc/data/air_quality_stations.csv @@ -0,0 +1,67 @@ +location,coordinates.latitude,coordinates.longitude +BELAL01,51.23619,4.38522 +BELHB23,51.1703,4.341 +BELLD01,51.10998,5.00486 +BELLD02,51.12038,5.02155 +BELR833,51.32766,4.36226 +BELSA04,51.31393,4.40387 +BELWZ02,51.1928,5.22153 +BETM802,51.26099,4.4244 +BETN016,51.23365,5.16398 +BETR801,51.20966,4.43182 +BETR802,51.20952,4.43179 +BETR803,51.22863,4.42845 +BETR805,51.20823,4.42156 +BETR811,51.2521,4.49136 +BETR815,51.2147,4.33221 +BETR817,51.17713,4.41795 +BETR820,51.32042,4.44481 +BETR822,51.26429,4.34128 +BETR831,51.3488,4.33971 +BETR834,51.092,4.3801 +BETR891,51.25581,4.38536 +BETR893,51.28138,4.38577 +BETR894,51.2835,4.3495 +BETR897,51.25011,4.3421 +FR04004,48.89167,2.34667 +FR04012,48.82778,2.3275 +FR04014,48.83724,2.3939 +FR04014,48.83722,2.3939 +FR04031,48.86887,2.31194 +FR04031,48.86889,2.31194 +FR04037,48.82861,2.36028 +FR04060,48.8572,2.2933 +FR04071,48.8564,2.33528 +FR04071,48.85639,2.33528 +FR04118,48.87027,2.3325 +FR04118,48.87029,2.3325 +FR04131,48.87333,2.33028 +FR04135,48.83795,2.40806 +FR04135,48.83796,2.40806 +FR04141,48.85278,2.36056 +FR04141,48.85279,2.36056 +FR04143,48.859,2.351 +FR04143,48.85944,2.35111 +FR04179,48.83038,2.26989 +FR04329,48.8386,2.41279 +FR04329,48.83862,2.41278 +Camden Kerbside,51.54421,-0.17527 +Ealing Horn Lane,51.51895,-0.26562 +Haringey Roadside,51.5993,-0.06822 +London Bexley,51.46603,0.18481 +London Bloomsbury,51.52229,-0.12589 +London Eltham,51.45258,0.07077 +London Haringey Priory Park South,51.58413,-0.12525 +London Harlington,51.48879,-0.44161 +London Harrow Stanmore,51.61733,-0.29878 +London Hillingdon,51.49633,-0.46086 +London Marylebone Road,51.52253,-0.15461 +London N. Kensington,51.52105,-0.21349 +London Teddington,51.42099,-0.33965 +London Teddington Bushy Park,51.42529,-0.34561 +London Westminster,51.49467,-0.13193 +Southend-on-Sea,51.5442,0.67841 +Southwark A2 Old Kent Road,51.4805,-0.05955 +Thurrock,51.47707,0.31797 +Tower Hamlets Roadside,51.52253,-0.04216 +Groton Fort Griswold,41.3536,-72.0789 diff --git a/doc/data/titanic.csv b/doc/data/titanic.csv new file mode 100644 index 0000000000000..5cc466e97cf12 --- /dev/null +++ b/doc/data/titanic.csv @@ -0,0 +1,892 @@ +PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked +1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S +2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C +3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S +4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S +5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S +6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q +7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S +8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S +9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S +10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C +11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S +12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S +13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S +14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S +15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S +16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S +17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q +18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S +19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S +20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C +21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S +22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S +23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q +24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S +25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S +26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S +27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C +28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S +29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q +30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S +31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C +32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C +33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q +34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S +35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C +36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S +37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C +38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S +39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S +40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C +41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S +42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S +43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C +44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C +45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q +46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S +47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q +48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q +49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C +50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S +51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S +52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S +53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C +54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S +55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C +56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S +57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S +58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C +59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S +60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S +61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C +62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28, +63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S +64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S +65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C +66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C +67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S +68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S +69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S +70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S +71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S +72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S +73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S +74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C +75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S +76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S +77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S +78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S +79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S +80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S +81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S +82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S +83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q +84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S +85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S +86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S +87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S +88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S +89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S +90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S +91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S +92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S +93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S +94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S +95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S +96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S +97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C +98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C +99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S +100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S +101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S +102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S +103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S +104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S +105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S +106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S +107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S +108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S +109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S +110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q +111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S +112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C +113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S +114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S +115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C +116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S +117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q +118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S +119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C +120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S +121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S +122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S +123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C +124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S +125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S +126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C +127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q +128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S +129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C +130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S +131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C +132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S +133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S +134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S +135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S +136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C +137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S +138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S +139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S +140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C +141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C +142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S +143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S +144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q +145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S +146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S +147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S +148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S +149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S +150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S +151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S +152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S +153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S +154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S +155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S +156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C +157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q +158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S +159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S +160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S +161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S +162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S +163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S +164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S +165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S +166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S +167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S +168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S +169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S +170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S +171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S +172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q +173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S +174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S +175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C +176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S +177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S +178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C +179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S +180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S +181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S +182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C +183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S +184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S +185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S +186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S +187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q +188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S +189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q +190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S +191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S +192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S +193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S +194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S +195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C +196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C +197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q +198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S +199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q +200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S +201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S +202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S +203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S +204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C +205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S +206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S +207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S +208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C +209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q +210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C +211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S +212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S +213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S +214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S +215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q +216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C +217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S +218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S +219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C +220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S +221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S +222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S +223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S +224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S +225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S +226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S +227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S +228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S +229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S +230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S +231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S +232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S +233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S +234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S +235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S +236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S +237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S +238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S +239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S +240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S +241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C +242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q +243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S +244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S +245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C +246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q +247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S +248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S +249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S +250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S +251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S +252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S +253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S +254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S +255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S +256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C +257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C +258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S +259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C +260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S +261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q +262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S +263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S +264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S +265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q +266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S +267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S +268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S +269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S +270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S +271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S +272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S +273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S +274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C +275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q +276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S +277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S +278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S +279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q +280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S +281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q +282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S +283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S +284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S +285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S +286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C +287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S +288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S +289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S +290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q +291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S +292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C +293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C +294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S +295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S +296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C +297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C +298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S +299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S +300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C +301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q +302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q +303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S +304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q +305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S +306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S +307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C +308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C +309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C +310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C +311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C +312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C +313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S +314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S +315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S +316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S +317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S +318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S +319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S +320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C +321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S +322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S +323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q +324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S +325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S +326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C +327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S +328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S +329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S +330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C +331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q +332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S +333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S +334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S +335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S +336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S +337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S +338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C +339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S +340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S +341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S +342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S +343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S +344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S +345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S +346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S +347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S +348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S +349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S +350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S +351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S +352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S +353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C +354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S +355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C +356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S +357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S +358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S +359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q +360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q +361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S +362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C +363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C +364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S +365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q +366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S +367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C +368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C +369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q +370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C +371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C +372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S +373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S +374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C +375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S +376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C +377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S +378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C +379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C +380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S +381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C +382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C +383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S +384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S +385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S +386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S +387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S +388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S +389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q +390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C +391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S +392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S +393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S +394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C +395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S +396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S +397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S +398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S +399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S +400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S +401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S +402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S +403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S +404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S +405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S +406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S +407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S +408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S +409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S +410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S +411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S +412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q +413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q +414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S +415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S +416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S +417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S +418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S +419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S +420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S +421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C +422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q +423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S +424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S +425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S +426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S +427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S +428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S +429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q +430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S +431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S +432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S +433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S +434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S +435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S +436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S +437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S +438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S +439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S +440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S +441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S +442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S +443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S +444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S +445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S +446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S +447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S +448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S +449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C +450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S +451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S +452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S +453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C +454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C +455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S +456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C +457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S +458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S +459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S +460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q +461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S +462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S +463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S +464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S +465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S +466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S +467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S +468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S +469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q +470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C +471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S +472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S +473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S +474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C +475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S +476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S +477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S +478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S +479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S +480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S +481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S +482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S +483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S +484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S +485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C +486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S +487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S +488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C +489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S +490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S +491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S +492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S +493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S +494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C +495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S +496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C +497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C +498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S +499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S +500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S +501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S +502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q +503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q +504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S +505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S +506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C +507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S +508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S +509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S +510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S +511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q +512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S +513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S +514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C +515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S +516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S +517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S +518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q +519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S +520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S +521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S +522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S +523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C +524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C +525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C +526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q +527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S +528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S +529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S +530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S +531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S +532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C +533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C +534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C +535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S +536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S +537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S +538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C +539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S +540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C +541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S +542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S +543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S +544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S +545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C +546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S +547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S +548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C +549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S +550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S +551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C +552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S +553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q +554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C +555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S +556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S +557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C +558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C +559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S +560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S +561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q +562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S +563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S +564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S +565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S +566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S +567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S +568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S +569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C +570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S +571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S +572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S +573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S +574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q +575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S +576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S +577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S +578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S +579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C +580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S +581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S +582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C +583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S +584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C +585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C +586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S +587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S +588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C +589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S +590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S +591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S +592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C +593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S +594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q +595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S +596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S +597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S +598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S +599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C +600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C +601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S +602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S +603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S +604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S +605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C +606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S +607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S +608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S +609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C +610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S +611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S +612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S +613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q +614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q +615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S +616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S +617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S +618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S +619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S +620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S +621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C +622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S +623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C +624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S +625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S +626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S +627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q +628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S +629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S +630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q +631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S +632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S +633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C +634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S +635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S +636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S +637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S +638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S +639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S +640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S +641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S +642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C +643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S +644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S +645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C +646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C +647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S +648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C +649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S +650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S +651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S +652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S +653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S +654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q +655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q +656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S +657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S +658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q +659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S +660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C +661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S +662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C +663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S +664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S +665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S +666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S +667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S +668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S +669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S +670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S +671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S +672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S +673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S +674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S +675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S +676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S +677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S +678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S +679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S +680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C +681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q +682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C +683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S +684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S +685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S +686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C +687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S +688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S +689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S +690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S +691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S +692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C +693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S +694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C +695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S +696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S +697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S +698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q +699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C +700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S +701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C +702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S +703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C +704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q +705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S +706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S +707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S +708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S +709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S +710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C +711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C +712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S +713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S +714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S +715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S +716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S +717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C +718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S +719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q +720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S +721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S +722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S +723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S +724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S +725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S +726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S +727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S +728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q +729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S +730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S +731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S +732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C +733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S +734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S +735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S +736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S +737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S +738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C +739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S +740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S +741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S +742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S +743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C +744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S +745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S +746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S +747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S +748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S +749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S +750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q +751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S +752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S +753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S +754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S +755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S +756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S +757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S +758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S +759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S +760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S +761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S +762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S +763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C +764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S +765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S +766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S +767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C +768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q +769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q +770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S +771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S +772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S +773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S +774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C +775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S +776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S +777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q +778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S +779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q +780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S +781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C +782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S +783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S +784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S +785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S +786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S +787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S +788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q +789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S +790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C +791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q +792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S +793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S +794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C +795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S +796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S +797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S +798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S +799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C +800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S +801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S +802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S +803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S +804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C +805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S +806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S +807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S +808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S +809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S +810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S +811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S +812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S +813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S +814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S +815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S +816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S +817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S +818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C +819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S +820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S +821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S +822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S +823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S +824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S +825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S +826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q +827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S +828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C +829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q +830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28, +831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C +832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S +833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C +834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S +835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S +836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C +837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S +838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S +839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S +840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C +841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S +842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S +843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C +844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C +845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S +846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S +847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S +848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C +849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S +850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C +851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S +852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S +853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C +854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S +855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S +856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S +857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S +858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S +859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C +860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C +861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S +862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S +863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S +864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S +865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S +866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S +867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C +868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S +869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S +870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S +871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S +872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S +873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S +874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S +875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C +876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C +877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S +878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S +879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S +880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C +881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S +882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S +883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S +884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S +885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S +886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q +887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S +888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S +889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S +890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C +891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q diff --git a/doc/source/_static/css/getting_started.css b/doc/source/_static/css/getting_started.css new file mode 100644 index 0000000000000..bb24761cdb159 --- /dev/null +++ b/doc/source/_static/css/getting_started.css @@ -0,0 +1,251 @@ +/* Getting started pages */ + +/* data intro */ +.gs-data { + font-size: 0.9rem; +} + +.gs-data-title { + align-items: center; + font-size: 0.9rem; +} + +.gs-data-title .badge { + margin: 10px; + padding: 5px; +} + +.gs-data .badge { + cursor: pointer; + padding: 10px; + border: none; + text-align: left; + outline: none; + font-size: 12px; +} + +.gs-data .btn { + background-color: grey; + border: none; +} + +/* note/alert properties */ + +.alert-heading { + font-size: 1.2rem; +} + +/* callout properties */ +.gs-callout { + padding: 20px; + margin: 20px 0; + border: 1px solid #eee; + border-left-width: 5px; + border-radius: 3px; +} +.gs-callout h4 { + margin-top: 0; + margin-bottom: 5px; +} +.gs-callout p:last-child { + margin-bottom: 0; +} +.gs-callout code { + border-radius: 3px; +} +.gs-callout+.gs-callout { + margin-top: -5px; +} +.gs-callout-remember { + border-left-color: #f0ad4e; + align-items: center; + font-size: 1.2rem; +} +.gs-callout-remember h4 { + color: #f0ad4e; +} + +/* reference to user guide */ +.gs-torefguide { + align-items: center; + font-size: 0.9rem; +} + +.gs-torefguide .badge { + background-color: #130654; + margin: 10px 10px 10px 0px; + padding: 5px; +} + +.gs-torefguide a { + margin-left: 5px; + color: #130654; + border-bottom: 1px solid #FFCA00f3; + box-shadow: 0px -10px 0px #FFCA00f3 inset; +} + +.gs-torefguide p { + margin-top: 1rem; +} + +.gs-torefguide a:hover { + margin-left: 5px; + color: grey; + text-decoration: none; + border-bottom: 1px solid #b2ff80f3; + box-shadow: 0px -10px 0px #b2ff80f3 inset; +} + +/* question-task environment */ + +ul.task-bullet, ol.custom-bullet{ + list-style:none; + padding-left: 0; + margin-top: 2em; +} + +ul.task-bullet > li:before { + content:""; + height:2em; + width:2em; + display:block; + float:left; + margin-left:-2em; + background-position:center; + background-repeat:no-repeat; + background-color: #130654; + border-radius: 50%; + background-size:100%; + background-image:url('../question_mark_noback.svg'); + } + +ul.task-bullet > li { + border-left: 1px solid #130654; + padding-left:1em; +} + +ul.task-bullet > li > p:first-child { + font-size: 1.1rem; + padding-left: 0.75rem; +} + +/* Getting started index page */ + +.intro-card { + background:#FFF; + border-radius:0; + padding: 30px 10px 10px 10px; + margin: 10px 0px; +} + +.intro-card .card-text { + margin:20px 0px; + /*min-height: 150px; */ +} + +.intro-card .card-img-top { + margin: 10px; +} + +.install-block { + padding-bottom: 30px; +} + +.install-card .card-header { + border: none; + background-color:white; + color: #150458; + font-size: 1.1rem; + font-weight: bold; + padding: 1rem 1rem 0rem 1rem; +} + +.install-card .card-footer { + border: none; + background-color:white; +} + +.install-card pre { + margin: 0 1em 1em 1em; +} + +.custom-button { + background-color:#DCDCDC; + border: none; + color: #484848; + text-align: center; + text-decoration: none; + display: inline-block; + font-size: 0.9rem; + border-radius: 0.5rem; + max-width: 120px; + padding: 0.5rem 0rem; +} + +.custom-button a { + color: #484848; +} + +.custom-button p { + margin-top: 0; + margin-bottom: 0rem; + color: #484848; +} + +/* intro to tutorial collapsed cards */ + +.tutorial-accordion { + margin-top: 20px; + margin-bottom: 20px; +} + +.tutorial-card .card-header.card-link .btn { + margin-right: 12px; +} + +.tutorial-card .card-header.card-link .btn:after { + content: "-"; +} + +.tutorial-card .card-header.card-link.collapsed .btn:after { + content: "+"; +} + +.tutorial-card-header-1 { + justify-content: space-between; + align-items: center; +} + +.tutorial-card-header-2 { + justify-content: flex-start; + align-items: center; + font-size: 1.3rem; +} + +.tutorial-card .card-header { + cursor: pointer; + background-color: white; +} + +.tutorial-card .card-body { + background-color: #F0F0F0; +} + +.tutorial-card .badge { + background-color: #130654; + margin: 10px 10px 10px 10px; + padding: 5px; +} + +.tutorial-card .gs-badge-link p { + margin: 0px; +} + +.tutorial-card .gs-badge-link a { + color: white; + text-decoration: none; +} + +.tutorial-card .badge:hover { + background-color: grey; +} diff --git a/doc/source/_static/logo_r.svg b/doc/source/_static/logo_r.svg new file mode 100644 index 0000000000000..389b03c113e0f --- /dev/null +++ b/doc/source/_static/logo_r.svg @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/doc/source/_static/logo_sas.svg b/doc/source/_static/logo_sas.svg new file mode 100644 index 0000000000000..d14fa105d49d6 --- /dev/null +++ b/doc/source/_static/logo_sas.svg @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/logo_sql.svg b/doc/source/_static/logo_sql.svg new file mode 100644 index 0000000000000..4a5b7d0b1b943 --- /dev/null +++ b/doc/source/_static/logo_sql.svg @@ -0,0 +1,73 @@ + + + + + + + + image/svg+xml + + + + + + + + + SQL + diff --git a/doc/source/_static/logo_stata.svg b/doc/source/_static/logo_stata.svg new file mode 100644 index 0000000000000..a6e3f1d221959 --- /dev/null +++ b/doc/source/_static/logo_stata.svg @@ -0,0 +1,17 @@ + + + + + stata-logo-blue + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/schemas/01_table_dataframe.svg b/doc/source/_static/schemas/01_table_dataframe.svg new file mode 100644 index 0000000000000..9bd1c217b3ca2 --- /dev/null +++ b/doc/source/_static/schemas/01_table_dataframe.svg @@ -0,0 +1,262 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + DataFrame + + + row + + + diff --git a/doc/source/_static/schemas/01_table_series.svg b/doc/source/_static/schemas/01_table_series.svg new file mode 100644 index 0000000000000..d52c882f26868 --- /dev/null +++ b/doc/source/_static/schemas/01_table_series.svg @@ -0,0 +1,127 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + Series + + + diff --git a/doc/source/_static/schemas/01_table_spreadsheet.png b/doc/source/_static/schemas/01_table_spreadsheet.png new file mode 100644 index 0000000000000000000000000000000000000000..b3cf5a0245b9cc955106b95bb718c039fa1dac74 GIT binary patch literal 46286 zcmXuK19T8{4*R+t_$Fw$1l`zQ6xl=bo7}-RE}C zoUX2V>Z$5*1vzm9SR7aY01zZ4M3evk+z0@`%%QQ2hGCQh#U4#t48nX{9Vv4dg2EDQh;0g@tuDsF3MSMw(myHd4xoP$-IHtKK-+uKZB<*q6lG;$8&}^sRBPJ zIZbw+7+vhG9s&!Mb~41L>S529dZfb|f(fXpJY3D!A6kXHACE3QLOs`T>d=d7W;hg{6J^Ty6$P-$fi8DQb z<}&r?T1K~}1B2~f3*lRB>|yeoOI2?9kXUrO=h~^Gf?AD9CigzyZV&9$`I@cp%te8? zvkiUh`qjiW`V<|G#e((L+l5PV3Vw zcmBmo8_(u1wJeyGKM=TaHsyvFmxSddBNx`?MOPP8NQi{9ranLWqd8oZ3BAlYnzq&T zje{aA<(5cLgh>VB3Dcu?s%vfd?rgq4CX&R+d;$8MgjlweX2$g@is_$fWx1C=j%G)& zMhDc>_FD6`Q+ynFZ#v{th5?aRyJuhaA;~Xh#f@C+Uyg`EMwxoe4;FkeM9{&Q+TGUQ ziZeWJt14ksZg6=F?@W^fR{l)$CQ7us(EYC7{efUG5`#}&6N}3(=Ll0)R<_=3jRqSG z^zJ*1#1dYfoRo`!|FuJJNV}G&kQ7_ICOHU{URvgH^0B$nVlNihDk=uXK!;j^6`mN5zaZAt=H%k z_5`9RPcjHI0vIlEthVe1LWC4i3V%uat+?dx9fjn)E9@Upy z!2wpJi_M0tl7i?of5hU_(dhNAm$$F=G3dk|VA}|AEe8IiEEiALxHY#$+8w#6eear@ z>&|$~x99QSFAM#S?3|GW?u3S`k-&(gZEe-<)YQI$_2PNby%b;p9}lyc=kvMEV0@{f z4|={u#)^QS9sZir>c@AOz;#Ws70Cav)eqM{=%qS=YN;X(Ug%=)KH=( z_oKw9AZVY$Ao5ybW$*#-xxDO`{S4DLk`3u}k%y$aIq=^_ZX+`OrDGIG z0A}PKjE4*X_{Aqh%C`1z95amq8lt*sZV@LBo@ZZ3{ z9rRy+zk&w#lgLzwS9T_(aE^+`z8Pnx$}lxLU(#>={lj-PIJT)ltO)$ar+nEl|X*V!u1Uj(ho083PvRJ!y!=#pN-<5 zBMBm0`U$GBS3mFNFn)gA%rpS-vAwn#Fh-Ia!@(oUlnYldx1nws&CrirDQ|c+<2kLe zaPmdJ;i06k8wpgzBd8~s?^KdYx9EpnK@`cf0VM|s-3wxs{A-vtt>H7 zyWFBN?aA(D<(T4II}dM+&)?d#cb-X|OoZRD0Ss`8N#J)*6w#Q8Nl8^&Epp}zV7ZNr zPVX=GWMpKGb%oFJg2~24iHaA=+Fy8=i+2?X}k>uzkL-$eQ&jzL@5R`c86PiP30~3dR8J)KfOxB#CcW@7&*hD4si;_Kgq;rN5gicDBMK>x^r6B?Qd_;uv4 z3ZR2JU67!SheQ=107~P6SR&`9n|mk#fDi?v6rH}+s(H>aC=e+t+M_54(En@?1v6v> z{PHF0^{GgnpS=Q@P1HSGadj9d%b1l@i$XJCz!Nj|6r?Pg2w?*90fFC%#h~r}frXV6 zmk48Wd|=_kT2q+3zxQS-@otfc%iDYGVwMs zIXQWuL|(7Qr%Qk+H=1mUA3-rLBUbqZe#K=j`||URXs6BPcyCOIl7wIU{FNY4h@_9| z7w)>ceViB5FKlFppT0mmDHw9~ZKjJZ#EtW~kZ%iDAbwsWr*l1yZ7_;uW8h8DwFTmH z^duDI+Wo)#;FIXNJxvu@#;(WO8lSKqtvGiDqee_E{pMd|eia~*OB&RRzGSS6F8iz% zwegzdEJC{{OTnl!d$O8RMBCcQDMktA%M^>4jGoKGuC2C3S0p^_-b z9+pOwP{g>e#hB?IESleS%CWX%JU_-}dlu~Lav04O;LdQzAh5!I=$Bo(3Vkadc(F6O zXl`G6658DHq@aJ2y*w;Foc(xBJFj3+<3T4Ul$ai`Cdz`}e4WllQ=yz|TCR$zcOfoU z_+{A{)|~uFwTKAzF8gP7)!5Y3cB@n4SgIPNPvePHE-o$>K0>(C={+-=5vZ|M>q;99 z(lj$BJCSg&Rfu_f52KEt(lAykv zjl*<{c>oH;^M)tm%?YLL+e>IUQga>8WO-0nFGAMw?gVpDF z{@zh3+edXXxi%C%?YPJ1&PQ~He0|e4TTHo|z1ikNzHXLQ>pITq@hF75P<(-EbULMj zK}+T2xPXn#8&KKJ8ML7{6;j-0Xgv;Wtjb4VSm&7S}z0qr^8VLP(l z8@B77YI}&@T#*$0=Z5nd=Z9%t#30?5*D6ocmcr9B%YNI0$OzM(d+hq#FLQ~^W0O6I zTM`Rz>wOU=G~A)V;!djb-ecBd?B&cxVrSi_We88Db)@?nJ1Cn4`5jK-F&BJyDYt}@ z0Q2s_mgPy%=Fc^jF0zUHz! ze%{X@LhsKoMXPnm=xC`NiVl>FuJb{3?leOtviXY3M_K?o$x!}j!&-~QDro=zvc z%uS}#YPMQ+xmhKVeNajnnT95CtXAp>yB=&(q&wtSpuFXE3CU4jd~xLffS)cNg_l>m zLy2z(jed%ztiKsP9Xn?VE4GigXChu51HLbJz9&vZP{3(5={>i{$fRtip1r>x!>-xA z#m*Lp$GYDhTt9p5Q{~L$u-od!67Z@(RU1yDHe5cgPye3IxlrmysW@-tlD(VrXw>+; z?-1uS%=K|c9kAV7Gx2B#L`*h*YgxazILsRbU@+ONli7`wX}R-$UCjr8dbJfB!>nXT zh}&gud*@FyGKt@$!8_kt5C@x23HK)#c?|JGtazEXFf0XV%?3g-ucH3EudE|Qxhz|C zXUK5Zn>i|8dZ)aw8-5`kb<-OB0`TyJLv}w$>8-DR|`?Z%uf7HAOr$^Xjo;b-9D4 z*o9K*iV3Z%KeydT%|F<$A0FgnWHQ-pw<0iT00DE3uC6Xz16s?L*qXD;T@gRHmQzZ>?B9#Hx{9WQl2{qE`Ln=odf56*t7Y9P>f zIs4m=Uq?s=nxvVK-_?p{Vg7buqy&-s!4WyW)p$0D`@9)H<4l%UkNYU@HYU1D7I!v( z+;%FOFK=bLn7N^|Qs9r((N5bOTiDb2by3BPDJpFgLkmnBD-$+u?w*x=wxiWsh;2mH2+k>z_Tv1F_i=BY~A zRRBo!FQ@_Yd-_MnO#lP86-#Izf4;zbwBJ2XWqJ@?4>sDLfEGgGDtCIi*i4_%92>+w!Rf#3nHqG3-pv2l<GP!P2;~UV4-1^0F$~MwWZGhozs6?868D z{L&lKYrO>Zun>W4Z|D6?gq9mS0a-2C%JwHJPm!93X-zvG1XF#|KS-;0Bee%Odu$Um zMS6X8rlN%$c zbtRB*YRuteGgxjPTBJ1C_dxUoKA$&xWb*SYcA{(&*B^Zpx6v>ZhD&DZgqOOO_=23b^OZ;!p!|J zpYGeS#6u~|HoM}sT=VP6lN=wnZX1PSW?8dy{1AHWTPE)j&$L|S^R?l+ zGJSy+@@HJJHX%Y66jL=-Tk*IJF1}j-O>+}qp0VqN*@6Vk$n#1Y2>pwqbIx_eHz7{i zLn5b(4ff1-@o#7}Em5ahO{STIF(D57`JW0oeosC=moPl99kWJ0aM<5rLjJVv^$yB5 zEBB#m8BA9J(zoQFKI^6$6o|lYx_k95mvSap;AW2au%>yvkxNs}5My-GP}xJ<001iP zGj|=+_0-$Rt2UdJ1G8o`ZyQG5JVxJV&9DHX`oe6>WqB6C{kjIWq6000Kh7IsUXHrk z$8h;-w!Dm4DUNO%qu!4lHD`s9>Ml?2mJGs(;5Si$XueyICxh}IbQDpR_74uPRj1si zW(NjOckf%yHlX8^wGAojw%fC!UvtlHHi=7iAc*InRMwsw z+_OxAPrqN8Pe$+QY1Xrqj1ZP^rLmy9ANmk7Gr^>-X}7@?RK)_CRaPIJ^(~KY7C1%W?Msr<9?myh(qS(FTLBzUs~mD$%$q( zrM#J*#%|(b`(<5t*AHs#YR-xLU#kpZ+kdThi{W(gI%kch(rWnU*{U%qIT=rO7Q{;= z4j)@SA$Smb=AL45tIs<-8@+7&ayHI8LW5gXn@(Vq_RkDC;AaF(?uU`mj$fLtP{%S0 z98)?kovzlPv1m{C_Wb$JANbf;oBkeU4pE_mCSp+_Jejwww2fWl_>gq8n11D?e$bRB z0Ef+sTeRB?z{NZ0zETWuGn+7#F9KUwgDkRYd)J&>IyhDq{0qSVB`~G`lOzRtqsuUc zDXp$*yH;nWogQY4pV?JZcN@)q0DmkIf52l06A&;V%x?SZy;rAIpi>t+GV++wyl^MNo&>&GW@#P&A{2Gf=27p%RIXc=r0b#@NwMVV{A;!w5^IIf> zEVVz(V7+M!?%~eZtK=Vxk@0fzDGu9`YzQ^)@UgYX!Vo~6U42p~RbpU{qu<0U zFPy4r3nHw=>i9O4z|BdYvQPWp6v+_)up=I)qb%;l^gG7i8drb)is+2pczp|cyO#u` zmeeLWE1)*|_*>)i(L}r=Jrp35(_AH6ZlM1nCI))(yUk5&1UG!4XhpzM-gR>wa-Z~w z%1{NT9RXAA zevJ1DEQ#lSc6ZR-(`JtelFq}boS%=ne{d~XYkQZ<4dVa9XwUo8`Kw9c6xqlpCt2hx3c;hhy$yeFF%D?>Bu#N8ylfOIbS_=LGJn+7@sCRvuUz2uVA)$47Sid?kowdBrL{PN( zMr0pv^&1Wy5Mmj*n4v6p3zrsCiA@O$(Yf$?(CzU|-<;vKf&(cZ-a>5r03g{P$iC(6+U5Y{YFvWRsa?1$ z@eFO|{Veqw8o=wmFch1-8(n`Tz;H(6j}M);4KYurO1(V_Tru@v0swS*%;pS8V7`R? za=#=|?#YG%0CdBT_HQKZN;R^;^SROsfM0864TqWhdZXPfOLpNyK*zZ6J=x$ZSO zyGvl^NiB#d1M@1^poa%>=o0?nV3;$$pCss3t0{xO&@oq2#T6-v^zXv(kw600>!sB!ftOM>2~_|kf&>h-RsNYxw9I6a@ocf0XS|=i zH@s7#Iug_xNE0d-4p&y;0jabm2^i;UmK^1dc_y^*Avf5@zhq2)IxwLZNT33~ZMQ48 z9?}nZ@OybTo^XaSJ8RP#EYX<(+?+-_> zeA*f&KFv(lRbSRE7}E{0R7Y+*`)7Qxen5xGh|-lyH2nO^Ucb!|@^108bjcJ_e?tr% zp+E`=K);j;k-#U{%PQy;ebcQ9bU@l_uhzok>Hc{danD`7jd}=$@My77Ao3gigwtXq z{2wMOZbbT!EDCG@xBvhRDF-&?i@Xn1I@~qAG}0d`NwbHXug_EL_FrS|lLCHGIhm?5 z7Bx&GG_}v6q4JREfX2&_kCL@`@{~zH5Pd=pfDKLApsdns$!5m+bRoRELyX|U&nj_a=2Um(T zG4;>-X@Nize}uQTptn*K*}OIr^W$~&2&aLulH)dM5{`xjBbdIUdjI%1HF2utWp^i> z<1#tKLnPa0y_5s^I+PDb-K8WI3_wIDA@eVR=+G2{X!!&GCF;f8iX$yirBHGIy90ra zXQEEkSO=;h4crm}HAShn!uWMptTZT%X#SD$=!ME5cpjh}bNPHjLWGB?rLI`^lL=*i z>@bC{$AH<&ji$L)$zbPiudqLLk1y|svZ^np91IOvaSph4B$k;LgA=%>dk?oSy$&ML zr`yLOT1KscvIs?P)6Dv}mY8n!o|L<+tRPHw3=LenN#j#uqEk(W; z&-lLALIL+Ey=TI2u!y#?hao|0OP9CDP|Y`PxSlOh;6#x&+g@b~x>maxZVyiBcmf~S zXW`<_RYI~TfXgqM%N6eCo=5ZZzH6)VTc7oql`ZRl)o4#EXQA(!q=Xv}xgr`H?hM-) zi&8Cb$7#xhg+M(Lg?!JoC0cK>^^9*!G%hSpTE8%P zb+Lh=$Kw9P+2zNCYuY$xh5%`?TG#OUuwC!*hW)lb3vGePVk>$4!at0F3>uv0@$!Jy zi49U@hJUttFU>xV0xdOL5D_Cq>dSkp>_0Z;`o_pewoNJuCL)E?BPzo)MfCHRiJSPQ z4Dm-aeeCcn9;j)(9fOCZUKJTvul-hk)4RYC+pHzInt#Sdn};bm4apxHpRED8QCc$E z%#&^YI^Ru3XYEsxW$-^oxBQGtFV*)E8Q~^9C zge$_|{zah~eqjOLI_|d`oQ!f3UWgN1XD)M1SBpYlL#o)`2kyIZCQsX(tdAp{2ijqo zqQJk*9r>SjJ+|?Ui7>B4~DM%4ie4G{%)muKKoEbRZhyJJbEtpSKx%9Qx7fD@frj zYh^UtriDZe%@Uh!&AR(oHtB-P%%cj`>h7jE{(da$%WeImTrOsKakG5FF0Dp8JZ2b_WCdRUz7`Y=4`j3p((%GoW4R#JiGcb2Tk5ZHy4);flt<9S zU$hy$F_&xzLEx}4EVV2$j71rb?4cV=hPd|MYjK@x`ot z(`rv7q_My#-yRLcY65BAL;kb0XJgqQBP_i4NagyD`SVa|ca8Me%@+Q`r!URu|6dF6 zB&0i{_lYecum15>Qby+P;UOE8wVO}Ut}_@Kj>bhpL;D%qGK*Wbfb#)|j=ZJEV;B0K z{N+2I0$1b_*TdzCf{%IB;}Ai3M|;B&?aQTk>86V-^@|&b@2hd%pU&czH69xn80hJC z`}}w(Kcj85SUNe4yuQBPuJ%O5!71l)yV)D-DOo_r#(ugV!eY>UTjz3R-Lg12=(uo< ztr`1%b9MFfVxXinzn!Lz1xi({C@U-XC`QT2$+60_Ff)H-C$6opG@w#)bH8pYyjNCM zs^~IMQs&|_;H)(ivKBUkJ@A|9VcM$g&`Re9AdFJ7d5iLpP@U`USuSr%p?P*jCV7 z&pD!bewDPd3tz~UUvo0j=9V%-LW(7MClGMiD3OZgbL0-;rugNI$&O9}f+4feK!bjQ z8%lcJ--8RCKf+^neLU}td}*S8`37l=$oXrW?ejx_ri2U$tXFe{o{i++cIHj=?I%b3 zEqYns2(AJ=E>HDmqj5sIsy~fd_2Mfg9iV`YfreIavs}bqbM~iWWq6!qb_LF`bgM3* zH#}&?*6|i@BpPqE%W{$(H9v&EM%SU%_3q|I742ZZ0_*v?O^3_1!dd57x!Wu0>SRhf zfQXWeRO6mOlo%QZ9T^>UL#RldTv^9>dSc=`H246crRAw_IckKIBWg{}jF){Machn3 z9X$RQ>2}E%?s^@m%yzvi>Mjy8c?h6-S(~3i_meGI8?m@8uJJBPahKxH>KegXhnD;P z%~FLW`%*Xe^$`&7HlS!swO$c7-_s8XvlCPbLJg+u;t;o?4EzewSSoU#47A{@EAa z_+_Smf4RMt;XU^9@}6t<;7z5)-H;Fwc{hJU>in_Z1Jm{3zMul)+C9<|Dsq#{qqV{| zgIwl$E~~^uK6}cDIh6Jipga5$KuO4WRVj?p1UomQz%XP-zv)})xVBhrs;AVwt#QSZmU)re5o7#N1bQDuCaO_nJy z3*_O(kGFI+9L5`_)&y~aBogJ$|#=N@fjD}n$2MTbK(@*M01Z`-zn_fL*mQqG4h}g5> zXnV@CE)**lMo2p>syP$V?cv2*`13~ulSF*3L;xvFckvv0H6UZ(0t`VAgR zbm`*{s_#0R?n?FZ>*E(c{aAv+Hg_Gx1oWZZ;~q&bG*na`$K@HJ-*H5ecob4nQgn1x zQWN9w2nZ`xy3gd-(Z7E)Nh(tB{|HBB9TNe<&(NLU51dIc?49h&o!QCAG^i6?&XDUo zZ9VA5{*>r;%g}%Id+XOOj3|%KryTE}lq!GNEU`#)cXuBi9v&y0Fsjeb&mSL`+NW#N zwlOgoHfcy=URYRYYEH4Z5hQD~wGAG@LpB}Zb_>$odbA8Y`BSP`aMIpnIzvFmS}5T& zUqkZY{Cy-mWJ>JgJ-M>Bb~bQ$c8wSSAwruMiroXZ7Ya?U%LnndM!hI|Hx z*T~*7OY?RXE|ASBndlF;T7_GC@+L+o8Q)^NZH|KawzG74%5J#1xlul{m(45R`pLL8 z_{VJUN}q?IMi7Tu>&w0#gy0G9Qng|^nYeqMb|C-=(>Iu;4i73_HV7z^s-;LHL&WT& zi{f*7l(rU{^|yvYL3P6HlG|*z-S_r+3qv6(n=?@P{W;Va57azaLVvB|nE++-mmAZa zi;=(LjU@5jS%3_!c$-Wg##0!#_hS=x1KrtCc}kC&?XVpraDV;I0!U1!lpOq|Ea>QQ z8am8VM6Y8r`g3FV+05%Ll;0E?MpUk(sQO?&MfTDSJ+ZBn*f#0D7c$9mZCt|XK`I%i zm(e(UYc6vgnOPYXnUIt`esi0&Ux|h(CP{kVYGWX2Y%JO)fQ*fP^|!A8Q=d46K?hB` z&>%c&EM7%bHEa;I;H7;+mPDkfjRaayR71sR@g7(08}`8TMFYN+v~(4+s+dazEdzt# z$p<=~Fi40p0;sTXV)+{V6baZrJ zegpsBvZS{ta}^>~RQCMp6szRvSEQRnW#r}cy1h69`_#KUn*99*zc&)YZOI%uMDuv` zczO(L!l9-ynq8lvGc`%47F+Gl50mE&7N=;byq5?sT#jSk${5$~rT_bWrE z-?>Dm9*ozlx9BaGGigx&U2dZ}9xjue?4Pa-6_F{E%l@kWWB+&|i_M;VZ;lU&n@zP^ ztlTeliZn|* zWA1m3#zEsdP4eEKg{r%1XWRE%!AUp+B;vb)bq9W`75xR;~Nk7 z5qVgzDX`m=hNtIK{+7@tt~5G5+KB0h-5s3Da0Nt3NVtMRAixEJYn4b*X$%z$BzJ=s zowG_xa_*THmWSD>>u;A%)sP|T>g%EWCS$)D?~|cqLS z9+pdo$n*;RRtCE*xPYBqZFY7xIjTx%-e~_|POAsc2xd~L@sx|@(CZ}i=rcPlIuH9_6p z))SC|&;x(vGSl%|Q9Px^)z}$(_2^T!eL?y^5Nf^NsHmD(hDPD$;f4%hf3{Z^5dlk| zQqdN|iW{VP9$0Uvb6V|UtRh``le<<@>0ZZFxA;m69ZV8W8Z3k&h%7lKDJUq&?Rq{r zIVqw@%EIEfa-jptIm_qpo#Pbr^WNJtZZI>c7}K&2`OwVNj~m3|s>zF?0DK2g0Gu8LZ0a7$|@h zH}~Wpb#JWLF{7|S1E`R94r7B&-GbCHqaP9y=D}}Q(UV^uCy+MeuUwe%P|fvf;@ZvT z@{KpLw8X^3V1s$M{-+3x@Cjo^MPy`ITw+&n4*L$z38ph#&PRn?4TFql!}qw*0YrY_ z-~xh&^jTm6{=3|+wpFfO@>5nn*x1PVt5D0k$uJ|6e4U-0dzyZ_dAmQqc6dT-A7Ia7 z0914|4fRd(-OU`&YX*sZ4%Y8ozB8gY5vb4*FfcH{Y@TrFaB1SB2WBXO95GG81^k3G zCn5kjmeGwTFMdx)B8%6(!eYCvK%q_ss*0pOWs{7T@-#PJ9h}I@rr1xbk4}i+YKCsQ zxihn|H4W8=4;Jb(V#bGumi&qodQhF6&8Ie<{loKhxl1FfpiimnPVntJ-pGhtW+h%| ztNUq9vvs+uN@{8<;5Yv#1@7cWGWF_fK#S{6c)ImTN(t0!cjtOXOETRIC@ipy_zy~$ z_%U4i(1?(rh$@o&h6YH!lU#21oSvTQDWt{{m1YyH^z{*QQc-c0I~oqM)zwuJI4Q~b zFY-g(k2T!=DamIhi>AceL3Y151zcS8WVn{^Ww~z>>QqqikTq9&e@kVt z#uSY!to~8|LPYd~JMnrjC~teaUT3K_?rwMRQM?P46A1@JT1TV2F_BafTr??@FRR5Q zy1J(3It+zWQYtb6I>O-fg?inIHQ2z)YUlXh@&3Px;z1r>p6F=VMKdL73H@-ho8bhe ze>MfLA>qame@Zmq9?8YpmcAEOpK@`v6I|#b?d}d}1|?x)fxH5b$6IN3w{C1TZ)D^R znbL9KlN+I}9zMPe$Wow*NzI@N*H!Cu7*K}fQwHRsi~c4B16qZcT<#8IgsSfOmYD7F zM(nj$S4EITKm}sd-RWL$PuHhu4Vq~F&a&Sum?K_4wz9#L*;yE&BzH*jh+u-dL|=Pn z+0gUiq>I#IYOXKJk7>oUwKE;+;6Stl2;s4#k&m9l>_g1;W zd3i9ZciisMdfn8ucx8_a4FT@&H?q6>=g(r3moRxkT54*i?`Jot%s^HhcI?>w{EW_y z>ff&+CT8ZvrKKXu`p!<7-#L|!PSO;gY?n`a`*%N*>DBs&&{ zNn6FR}kWdg{0{xMu&;fRB8>3-IpkI4Xrxe4w zCks9xCdT#5`E7};S+H3ljnY`tf}rAxw!$(mJ^0I&`7{Wl!G>BYUw{c5PW@Z@iZ1#M z8ynt+&BFJ_C5eA#db)brj8)H<3>uV~P(SeY@^v^AYO8!urR{5u`<)%ZguONrkN_&J zBh3>4qr{&LXKH$CI1==|B?1CSARj06*25LIaODi=^NW4I0{O0&H$IQ@RyqWRU}4x>%}+ z!}lF0>T?1+Ogj}toU*W0ND6e(&gc2d={)OI8$>$x6^X#F9I2TdJx1d&==8hy`i!Ud zr;JZ=gl#Izs@17UOm?4t&CVqZ>;<-Z^U8%iB@GP->{OJNQqt3Zc(8p1KyZ(amKGKc zuBSOMnif)tIEiKtJ8;UVPMsDrfCPY^mPaAL-r4385|x;yS4dhZ%_yFeLnh zD@i5e=ducthEYc_&=t@n!O-l2cb2Qx6-GRA+7D=xO3+0?G2~KXiiwE{sif*_Zqw7h zP{N1H?au)uelx`lR(nE+CpmgyA`3m@Qjl2*h=ASaM;Gne^+H_kYB5=$1K|QX28P3H zHyfP0|5_TU%XhQmg6mXhOjo$uru)Q6Soub*dz0h2%b=)FcWPt|d0QA?@g$$2zu=f$CM2qqQ7J|fiH zV&xTtMnJ;84RCu0IaUmoVWp_0*u|5+dv$*2cCL&WS{Qf_)fbkHing+{3PJidPhB~5 z9a^C?0IOK-_LyMKADT1Uo^7Ut#jk~qqJG4VV5XLqA5tZmM$#M$4nT%Q1@PKa(;-i3uw0Ik z;ieBpa{lq(EVF2&`4lP8J4P%iLoIM&vSx8h+R%gjnk`S-4}yV?{JEK!Kq&=KQqV5B zB|`@!CZ$9U#q;sz%V(`6D1LwgPS`vw)q!B^@PNzts?WoYv-sE7`kR~fkj`#QW=FCf z8OjNUB=roY3DFw^|I8^76o6G|QMP<&#B?%)VtNb=5ccM6Ad56Gw2dPjGh}u)o%B#t z1Ov3Sv_g$1qE75$6OUI`c9)!6v{tGWwmoucQlu#@)5oze7Wdq{kiy7w@dBGah?(1D z=u^gw>dU{hY11eCgXp$ABtY2TXmk7utTmbkzI~IXw5zmL1Bj|Oq4JZ5iJ?!h z-?UB&R%30g*qGR5KasU2)a#w9Xmv)4H`@08fO%f;HCL#{3QI{vfZ$MW41(|N?d>;s zF(04qf`S4M>n56D0rrVM+!_ZV%T#6gl^aUlqEz-)h2Eji_`D*ty|-xmMn>TbqdG!b z>J_$G9`{GOr&2lDyyZ}e>8m-Oj~Rn+=YLZ58pgLaeeJ~IcrD$dPvn~@A@>7wy^>Eg z|0;W{O*P~?FQ{2|HXexbRucped1!ZePzwYNG&{nKe~}%u;DI_&$f`~sX*#40MZ{mr z<&^Fv_V}43tl_O-O`Fu$I4KJ8?T1G;eM`B0qBOOr@pJCM61equCHkwmM}uaI&4`K# z;|2~HavDu~1!PX|Vxz ze7`Xj)#Y|lczm9#KjF#%QOZQtnGO@LXj@@iAvE#c(Q)lr6wjUmCtzfJNCM65bS04F z(2cos`anz9F(|-4l4whsj3T2HpE*53Q_lihe~PN4#w=Z>t%FS{+ zb=FpUg#|7ejmcokX_srS@h#5C-2Av=wPFAEAkFFgr76zEdVMDSHo-yk>2S&uEyWXN4p+{=QZq7WcqhmiE*R3)a;4`?=;3G%p!>AXh{GXDCt_-rZBB2jJwV-ds+!7$_1 z#7A`+-z2x0jfc6RV#(4x)O~FfcJK_@38hm5y-# zd9pnu!RU`K>u0Uy9Q|q=W@VNVM;Zj@`P9*rYX6&gQSbWgu{~F23Zpf-{a$}r(sUOy z&?{L%0Nv#< zeLRhQXOOQsN#aD4^R+9B`g2KmYsK}s-W(|UPU_`8}7$eu| zBRSE=_jCUSLM=6?xID>nR0otGHd1{-h9qTt*zD|<4@_F%y;X?IdkW}9gs3|^mwI0NQo}d<#SFSfudizN zn0NOuekTZ9C-Ji#o{t1fOz)xa2@-|-sNHqdNr81O3aW^JgOHR`p$G}2-}l!?14>QD zzC;h=9@&c(%NJrqv|DjIs1P36=^o_An4dR;bF1AY)P}_&@n{qQUlFKVT^auEO9z2y zv;24^CCw%?1n77P)ZwBcwHoa_%JI3SrGX)``#s5|VP4@ixy# zdH@0f;>wNZXdZ& zWwphqDnK#-10ALT(T;DFT%Yd4w|sO_E>@dnCMI||<#Wvn*BT`xnyxg|RN9YrKxmnz zr8yx6*PH%y-P5c52;^n`ejB^Qfc_R4E7l67c*ZGLmGL~z;>}{z{@+H&Cd{YBO!#H4 zWUf%*i($yGp+XvgTD7Yk{N)!SmPCXsEXv0XF|vz_ z-@|aSjTd*QL+f91&3UAOdg7Wvw)qD12oWH6pxVQ1-vMNrOC*#i(e-;F0VP2%xS4vK zg;ea$dgjl&V++IZZ@39SL_ISCS!-)+s)Mz0D>ibWwvySH<2b!MHQHfEYwJckTC7Dg zf_M{h_BYG*9aT?xW4bMOTrfbfoA&e}K(ClVtJC0kzUcOFW`!p$phVso9)6zB%Z}5I z$o=Y9o3>m$sY%|(Cae+jx0u}v@z`RYPw!i~ENTNo*0DX0l~=PJ4Qh1QeRD;{-}4QP zrSlO1^3tXm&8-!#zc{^~YV&@N-G#}G=yg3bG<&2b$ttQyYk_!kPGGiwen;QIp{ouq zxIcwK4_sg(nJ&p6C0>PVG)P9S?7i(kQW$xBZTU}LS_9%2eG)Ok1=x$tam-ydo`z)2 z!&neiXK&AxGSO69E1-CXYYnEDqOONEF@#od=J3GrEcZ9wJwG5Iz}390Y^Va_dp*QV zCFfAshSO0mpETA-yTcTht81*s_j;BqsR`e88l!+RA08g6^yX+*E4-V#Xz#pi(oxhZ}xi8KzMo#k^a$#@4lJ8Tw@ z$!2^lUfxbSyp@IBe9CgQ9MiO?x}DK1Z2)_Idbs0Tjc%Q+Qklh!1|l9uZMODDBhJHq z;$%Cy=SfyWr8ot-G<6bFYNdxTH99kSLV*sg6}vjrSL zlk=9>hR7NW!Tt5?3_^7}0XFBG!P|&&q*|+I>t?ri2fbk#Dla>`zre4hN{#Va{fFvm zNHVfvMa6L~@+|H{EKDwUkK$--{qsE)UES&XyG0R`u2b(t`K84h+NsIO@=d8H2a4L_ zBCd$H%F18f)vST`lglrtpaVUfZD{@_X&J6Nw~7#wzkcD#Y`Sc*m`+!^w}YZxR?B6g zy2rTNmD17X-X+m#y)Qtysnf2r>ge@ZXa0CTy5QVx#17W*@_7|cF^)zvn8@oR?f;Hi zAb=5(kqv>u$Yj%JcH|=qi(Y0n=nRpcmBcQ`MI<|~eDV}NS9L3$f`xp2@ zxhI+k5v23+dTHux3-tN!8K%`)*Pf}49AxOk(8ETIm~^_NNm8V!;e4TA{Qh!OPwdt@ zYcQup`7`wlv0;vf3c6eIn#Gaw=NKA(0bX=2~uR|SVuv3(2W#(v0+}aFMVurKL1;= z5mMx_Bk*3f%>A^OsJs+jR`jC|L!OMMyF8 zpf&XhL7u+f^d7{ur+-|mx7a3$m&j$6o3ox8OoI#$`~P1HkW}`_Y_meczEL+&o+_(3 zZK0CP*H$M^Nm<5DIx|bl;Sf~aUFU+*<^E!G{q)oSUFUS|-`R7x5g@=+xl&vG-2~Bk z?eKZ4%hS1QSf!%B$$DK?){npWvX$_&w7SDuGej1Lzj{kOT_j8jRCDuM1>#?&$QPXL z7;{*}`APR~)pu?WgHM+w7Zy<1)>}>GC(IXe#X{6IJ>p4I)0Z*bzdv|bogpNhdNabG zU@2>mDJu6Aqs#WPeUFX!sv-f`XaCG4D2BZ4~qOaD_9`#zj1q!`MVKDxZD zet+Mti8gw5^yO!P0}X?>UawL1h(@2+gOsPsmCkY)cW>*4a3m;?pt1Eq)smLAyam})&ztKM_^+a?_*=^VQ#_XC#UrhWm(No4zd|v^ z!Dq1z+4=vnBHm{O5QhF&S;i7--f1}WjjO!xD+~sGO-04?(@7^b?>`=gPkqeEdhL_b z#d3c z?;O6z7j!r)8|$kqde7A=p9_+TPG6}Klfe^dsbcwkH-decxiQZgai~1U_b{Gz4y`de zV&j*C3c>y^t|)*a;CvV1g_J?m{DQ)iKoU?mz#)eaO6MhCr;fw*&O~U=Ih^6b$iqkhCF#Cn$h9m+I(^+`W$(w-%4`2<1LMC*__64|Pp+|%9R9af z$)s*WztWt(5+RP$kVU0j+y%2M=WgL{oBP-x??Fi7aXAr^oyKM{%F*MK^btJ7_S9)Q znyk zJdVua-I@sWZ^Vffl;z|gsi@g(CYx-cUMnxS%ws#HUXqQ`m}_7DN(%y{=;_tLsgv zF`>p=tk}JQc}R7u{li@rEEp1KdO55F?&|FCb#?Ue`PSyv%bVksoyoyM^!yIbV+_t_)$!8AiJwg- zKjh`@4f^v;O^@8Kd1YpwB?==-J1gVMK`R*e`s!6aqqExK7W=w6IXTPkdVEn#Ax}d? zgT?_5;NrrC&sHs4bN5{ir>aiD>$v6P)J~YJPnmE8?4C6}xNwG&6_ID|41YhHiYz{( zn+|2B2#3S_9-E<9cOKrx_Biy-xBfUhF;Ou~#LC*3+v`^cc_OptyGpAQan_;t3S8*y z$^FZ_mbwc5mjKzVU?4TCiWXi}k>5^@o$Y(Pa@iMvV8U6bg&IL$A)-P%VVFlEuDwyI zyn=^jV?m3CzOHU?i(OeOyN4w<|gD17dVGSP#pKW*!nakkw_&8`xxD zwD0iromJ*{cPt5difkXyu0r*556o7~7P^;t@iPAkg7(wZ@8O4J66nrao}N~-+psXP z2q^wqUqAaSjy@wxAni06mQpr85a7QKNw?SQmBZzClW{O8C=hbE8EXuW&kz4bwe2rq zztvQlxo)ppR#rNz-7vQ?#LCswn+F2>jEbO=vK|A$6N>Nt zC65~zjAhFN=D{n|rm;!NQ9~^Zt_${-S`w^@3MT9n;i}i^bUGbPlqKA6Or-XIcpv(} z3=R&85G%^^H>H_J5<+E9`~Cf+C`^_NQOTm%Y(AsB(TQ5!D7-9$z36F$OmwCsm3Yi1 z1`+usphM}=|036uM`ep(bT-%bqWi)+jWY7WG?vnoPUWuULo5IBsIq*a@A~borJPu_ zJETxW33WniJWC>NfYJH-MIU_;Ef;H)Yw>Qrz* zLK4cbBP>9mEXm(c$Unb1htHSm&pN8s)&ZQ{1FtmTCP~77#dl9r^hl_fEsuYl=tLQoP*!?F|s|?wFX>(b}%pX>d)A{342cA%3t%j@5|BR5UqL)wbEZ~Nx z)!ve9nA0?Ck{&SraV6mhlhfS_vR?T|XW=~Knu;v(lW?q@X6jDNsiKe*7xVow; zQ&klNs8@pCN)>-=<|wZ^5z9sKwu(zg(1|>I+OT;I7qwVv%mj`ZcBliguApG<-P+F1 z&fBQdYjJxy`_ZkU(pD^Ww7dmlx%=t5ti0Uq4>7Ci!|j32diT0JxaH#1oHqMTzrFW- zY*kcNbhP#ex-_vCMU6GxaYvK@hLt>~0_rd$EeQ$85K!Zt2}|yy=}5FHDYx(QIYK zVf!7AXu^WJBJhLS^AQ6^ty6{e{@RFRGYkou@s(`JH1#C@|N-Ef`KsTGeZtNb!~NRST$jsQ|nsj)n#F5$WF=8A`iI&>xakFi237sP(e!! z5{Av3ApJ9K)~wM=R9Hw)Z?~M!uR*t_^8Bhg&o>GtjZfI(rVl+aHfHe(j4vrGgU91) znG8PUf26Ih%CLp^bE8gBTMAz@B;)}V%NLqc4V3>9w4R)#5m2p#6|n9vSZjK!td)h=sVCV@8aGF60g5@?y6O_oO;n%TlO%d;dXe<}0{ zL?8hbukWTWvjY^O)sWF&1lU{}QUw5n)bL96UTLG?1(p4BcsxM0F!i(MQTcpkyHDfZ zr{l^K44#q=&wYfw3iIYlmF^N@9n=c%uwDOs+bQ~A&C?wDT;6JC z0TA77T)e!sH9dFd?RQmHYQDfE5#lwitbGnW{?GB@!#4y1>dDD{%vUZdQU8ojLfhK1 zY=&<-ST)7EAUjS$zKKur{d`;<(+O~YGN*%zgQS;V{6+1u?-R&_enW5o9nFS8?@YPY;2TKNugxS1X~ME znX-iUsv(z%nB9fbHerStU0t9Azpk`2wfp%M^3tXGc}oxO(&Qf^IFS6AQ&M_OG*1Fr zuJR|-nscE79!t+NGn(7I&KN_Cs0XMRe#xc3p$%=6llP>enDqNqMa9H0FhYL3q(X@( z9kri53hk1$yG!^ILPJBZt*uS-w_j=gJ~%m9>BS?&9f}Y}*YSSl&pc)hZ3O_JTOE0! zx!sy~unL(K<;5eAt*&L2c~`Hjst6XUs1%4Jml7nukHyD&M}@SNR)c=>lRE;3m;apI zI$iQMx^nyqY`*Ssi-?Yj`o6ol zXTsfaq)B`K<d+uLA;ZSnmX4Zu!O-XVJd#MfkQxaloWNvsmjuBXTJ3qY17uA zfvstza5P;fhV)f@#ZkfjLKJAR>1=SVI_r2YZm2FB9S@E{cPSe#C} z+9eG<-8+Npw>J#ck?C2E-XL!aS*bN2Sad)^aq;5Ae6bwp5v~G>F3tH+fjE5rDKdCx z^Vw@5$X$jv)(v`EaE`bG4^hhpfmqxnJXh$FGa5+sKc>ZAF+sBL+g>)D1ylhD% z4a(YlmMI!QTvJ2VP}C!{uZ4e!1O#QZYoZ#0)RC%>67{;sw9^09@+9XPIFZgo;N2{opxB3 zxk+&%{OH|UX+ zQdCxL!nM3>ZTYKgw|xPe?!rKKV`Y5)+c@-KuiX}zmuII2%F?I`%DDjGLYf{OX%g@q z9bw2hI6u1J2@@jH>wQ1I@}XB3hfnj&$tWx+qAxDdRaDj1&{9lKAFrz>w)`_$2`~8L z-PUST9=-HT%O(@JZTjiW-E(Q~r0DJc%=j*7re>C%uA;h(iEtFC{9v zZ21>+JJ{>y#?QMshI)cpF3z~SGX;qXe26znUTzLO`N(I|+MbDljqQ=57mh2d`B0Iz z6H-Z0@$){=fnc)MUj*ZrHvR2l1T3z|LQ?X!IHB|oTB>#GM5t(;@-4fj20S@(_Tiyi z`8gs4pNG8K4>Gc*h88}2@%`St(dHF|sFogAjV~8lzFnpQs|ij@G%llT+uFZ+uKt{@ zt_dr{P>tfS>3>cq4g{3t^(p;*l7gv)M*jY#=$n6j&zu%565D@G{8K9%CUCr(>S7(o#rRh>H|0h#2d~yf=7Mk!;|SJlF4~rA=tK%M^{U%^uE? zweO?Ezy(7Xjy!Quj;wlGn*zaxK*_jAZoEKzBPG1;)(n{kI>T!~2nii~} zl9CdT0+E%K{aAT__wjrQo3_5YyF*1qJ zAVh>zQW_2swZzoa%mSgQJesl~XT2t7l(hy1OR@LG)#ev1+C{~LethWE1k()CJFq&ovdC#862Lsv-`E!9eB2P8k^S0#B~lC zrmJS>-jFJ6?X%_pj?toueqUc%qBGqS3xD&Cvzd_9&HU-1Jj0>DVbwYa$Bj*&AjyB|q= zJQodf;Qof!>2yJqZN=AUxBG2%J@`8)00Ka<2ygkjLB^P2qtNZ1BMvXvG#?y!{{jwE zH&wYi#MyZc-K_D%sV?WV%s0Vj{{fw#qQ%o`aq;!{&97X`|Gs6PFU{p?PkF(yNvOei zWP22}QmAM}F)mLjn(nJm;d3AM@5M!w_qK59_r7;}_-hAN+C+&Qwq^JACAMu73+JEB zyh9YSIgFM@(X!Q!xI0|~{mOOH9`|DE(y1cEi4vgSrHe0(AVNv@>xaK>486&HTYbUu z{*uPD;Nt>b~qn=d>Lc~`;LY`xGEHZH{veCXY>Ha6$;Gm$aYVs18cluG6cNtK6 zaqL@PU>Daql|8p#vTsD)mA&n;`~rc^q0#X~YPDLO4JU61A!@jPHGy;*`3{D|HYM*= z?aB}o9QhtUn_ft-aeGE>%1Kd&{d;D1lIBA#tOz$HxDYd3f>c6E z%7`N^JA30zE|`P>5vpV;%>~1(hHDu;+-R-*Eq(gsW-qv^%IfXACTR%dqy$b$X}Ylp zadPcDr?0(gD@W#aFBFx4fP$ig3O~E0Kic5s1x2vokW`=F^z?XTRh5{7O-3Gm*caT< zOw}p#+5E|g3CC65&mzTav%y3JD2N4JSId%>u?5vqz~{Iae|oLWlQchk1u>L2EY#O&HT>Wx#U+X6 zzVh>!a0Y<)u899c3!O<WX_zyT51_x8P;1>)c}htp~3*7e?-kI4czQR33MD=F{J?9=km{cXEL>O>VOzTf;F zMHPwGSN7XIHMydss;JMLW{Z#)7n{K1;2IYz`@>~hM_-NrLdRqRAe7+tVC1V#Z&g*5 z6X)*4<{Geg-U1Nd3Ctc&b}|0r;*NqGRKTG(EHqi&*klDCwNDx5tbjXwb9mU!xD48A zvje7qCh04%ZQQWYD~%cz0U4NRU;v=o`Ped;e}7iha6CXDSv55{avsHUtyQIl_^qr2 zcl3{3C_xme31o^N6)+>6->>mYm=QQJT(q%clT-O%N^)vzb7LpLqk@LUGbrG*`0j%g zvhs)&<|Ox(FHjOTP{0oj_FFN|`KsG5Lt#-$v={Hpl@dvKZ~zKgh-?y)57K_O05mEh$H4rU!q2%v`F!4k zg9B-w4=xG{bxFy@QU1-7@^6b6;DPUwuCx};m~M7slW<<73dx>cNkb{TLj?6p13zMv zpAU1jRAjQ`bT>yF56biByjk?WUeRpWTD~M#VmjWAkCxy90Jfy16*tyqBcoI41ZNE6 z)*v>Yj2+I$KN}}Z06?VB-3C)&I%9HDGXK$GWMt$Y_=P=Ps(*rMw%Qo1eYwHq!BGxd zx{mjyNl08vP&1HiYFb(yAo$bax^V)O_;Ub2gO-W0lbp<#AHLgJx;Oqe|0gYKfMr}ue(G8lS);!e#36X))8 zcT~7`6QTkIaksJ5s56|DOMrB%rr{b11rEg&XE)+KH01Q|lge@WM8XH>vP3v&@~5ul zeH_sNmVu$J!#SIXhR)KYz?Q#3%~^;L$Ly6y+ep$Z zUIi&0?o9$39e)&Onm=vKg8?8gxqa*Z-&_DqiZ=5pho$YGLgUbK-g2{eP{U320f-6x zZJxay*<(~SrcBkAgDg9#d4PbH6V2~WNq=2ftTbBV^T}7`+f!82kE!g3dB$gE#D4hX zBqwK8NiANp3kgL)n7nt~;zhE1y*=t?(1R96zNl0~$tLc28Q1oluc>M@P$K@zoS`ow zu8L3Xlq}7XsuCR)U9wsd0B~4}LCz!5#)v;sQ-zMWi-L|?L2G)4tg&}8%Pu5r0tt(V zis;Zlu>98>gySeVC54oDgcITF8bB|Cw8gvOPf@OZUQ0(!BJZncrLUyFM4|c8$E+t% zPyBqbpN&H170{#6fb^{eH_>@_#xYgb2>Ixlohn9+F&X7ZaEGdE71@23K^y*)i%mxfweS|E&@7ybFRQn}V3N{H+5 z-BV0!{o>QiH9n2cg0-bS9QYoCV+zU`*Lu!b5g;S=z8bjoWpMp_q>2X*3(zJzmO@ zDut%P@}%2l5kvhD&zAY&6TEfd4MBwj4F*Z-IE|FoY}Wk;8u&7#d2xbsVYYP~aksKk z|0{42$h}uBbfK=MELPLox)!%Rx<3|8`RQg9iMEL#i(^9hNqP7JB2#k#oI+ceUbNP@ zh%8NrjxQi8xKpiyR=RF$0ZaSRh}fqitFL+ zkQL~<7toRO8^#U60f5o|J_+x?NWt_kWDU&QzAAG#3p?39ErMRyte`4UK{_AU4GWScv`&O&^63V99^Yh+Rk%hVsvSp9-5feW-#+R!p6hm7sJ$ z!4Q0n|8FMqU0LLfHL@~OC6n=c)mO_N@7QY=h93M9=Y9sR1h`?tGa^2~IUKDT&(c8r}@93vQiaR-58FJ9j4-r&U+F1rVP=3{>p|dn=a3R{er2eVK}6V3mu&pW8h~jp8X9RouCbA={(9FIp6lp!aGFR(Ew3WWa7M zM;$z7$IF}9rl0632n=X?!BZS z`SM_cP@Xh(lq56q1NU~sSZH3p^(TA(4soB~o{G8OzD51yo9x-9+e&mWy zO+|})T5_|VpP%QkTLhe*h=_}8Yh9hEt7!{W7cPHHpQ#D09<!Wu*Pjb5WrEPp-Xi z?s_-<6MHYodws%O|MP49x_4REqWtgpz0u{&REmCLU9$04zu1+QKfGf-kytqd zohD+RFh*cLU&s%3kZ0^S++NjeCd!S-#n4ih8C&w3j6T4vH8ofTUg7%>d8D(>Dmc$| zsfB^t(6jY_WaoltaN=mOKCTUz{)*-=PXuiY$ZLYgrQco&Ihvf_3F^~YBen79Rz_5j zLQ=vq*P@)Dlk|H09PV6IE>0paxTjtW(gObC`F-+C%GHI5KBqpwK}y{ZZOhaLAx4ID z{R+kj?%$om@>Qd`x7YQb0C%|@_#*jW^-``q<@dOAn=V7haps_O9325^I4`Lw{~mi< zK!5q}uei)8F7QdaTTRmc#f0Q!{)>R*pR!&C4O`vONxj@0^LiQ@kl+sMl61wV6oZZ7*sSV&(i zU4Eb~Qgs{m{hSY;u{gTiHK!G5&S)R9R=}}}Oy}ERx9kW)%+%mMZ;kb>Y7U1S?%(OU1 zUEeDbYg+52y;s8OC$#xful?HGQ;+qLGI~K)S4_JrUiq39-R-Qpc{eSLhh{*91{b#S z74XuV67DOM5OMNP`*WYBxIX%?15)+VO^yFH)hhkAMl^MTON9f*%nNC5Ct3T86x>qEth&T;oWn*Gss)$!;GAM$ge-1|xV=R6C3>z&%oqLu)wD;|wB z)&-MZajBM(=Rmm~hA~9t_j2Q_JboNx=if+FVqk=bh=8BhXFf&8n_tdrOCvo_&#Sdh zI^&lTk&>64b9zrc!?V1qJ6AC2Xe;64Z4L00wq=-%XoFQfs16eUR&~_MdJVaWNd&YgO;KgHv>`_OCJcdqqJ0BA zBnUzmWk_j52*l;^KQ}}G8XCHx)kt~$-@8z%+R9ZNAfzq7mP;cv0*&9S=SX1Zz97}} zLCf;uj8+~%%A4+ZMz@r7fAz4zg6Q^VJK}kD_TQfS7f$4M+^cpHbsFr~;2XOX1cF3cz> zPDI)7q3wX@N1R7LVL0SgtR+h?+Wn3Nj8re`YM-bhrqg~L=;!0zG%KK$CRVaw=WhZB ztw5|gFL4hhV1n2n$AEY;NhL1_}8Vbjc<`DN}s zhX>Uw53;rg@4W^ca6EKS2ylnfrAtCF&vkWsW>Bx`!z?#3sm zv1mBbMIsoJGF$q>K+nL2LSmqg)g8xY=x%`_I50e`7X+GsRfp^klpv+=r;{#^%ScTx z{ZChPXZw{T3HiZxAlkRZHOuxdY`U1@)>T%M_6&~NPK6P(4p*7edRaoM?&)zGcQwt` zq?E+OE)Yw18+E-M7SB&If+<+vcG3?j^|sh4nyBvWk)+cm<%@qb<-9g|4I}X z0l)rqw{SFqY?KBUR3FyY7h|uq1pO?nrfut!vxKwlO(#BMxXbW7s`#*=Y0 zqd;;Qsa3{?U2b+baEx0-lKA+|;=?n~wkJJtMy0Z|mWfKvcN}w-ZHW47$8M3*L3_ zq@A=3DQH}r13{Ynm3`8O+d8kEZuxgLyW-*OrKEyCSL{+D0cg6pY=f3j*Ba55PmR|@ z4gHn(p(F3zjxDHx$)7tAv0zcbf5YLhI0XC-AJbbSQiiHJ%S`IrR7Yy^ok?R!_}nJd z*qb}w($17K<=LkAAdZm#$zfi?Bs(BIO9An;Uq;8tp$`q3m)|e{6C(-bnQ)yA9$j_J zw;ReEOQLIMy@-ab?VAKRlm|(wK=C*o*`kao4oo-iBL!#|PJ69Nwr$b+mx{`OR&VFl zlqYVy7Qdc2kF9VDBuSI_14wcyWI%<#^P#&dpX7o6!0h!!#u0~1qE1$7PKsUx0_GW` z@mp>Aig_hU=b9?k0sor>w)ULW)|I3agut?Xmwnr}$d^@u39^#w-@PCsCieq}ygAxA z4P>uyY%5>mKFtmd5cP|z4OHLl^i5M!qG8(D6AA-7Zbj7}Dhro|-}QTS_(T4dYi;o_ zf^419nmS-^`sEcSY*_!l0R%y){yhZrI{^Y-g0#{EHMqRzdRVesehbe;De;D2?KFsv zqn&*>wBS&OPXPEKA;MmT8Pqb}4tI-3%GNCNlc#HL6xBK|xA9DIb@Qs9;820@_z5j7 zEnc1F@7dqrUy`56G(|_PABgJBb|z7bi-11WZ{}LU)j*zV$zC4wHD{7jhZ$t>W@wNl z23LDU^fk0t;6IO1pb_Mmf{vod{=cRbXy5&F|HcSN(w^JP#nEM495Ij1&i2~(pATkx z{kHKxA6^DdNg$=}7QaHmHtBoZULsUGWrEeYlz@Q;%3vhon6R4O_g-}A9~!b52NKKr z8#A@%o@9V72oYf2$Xz?5@3~pB@B{y+>zBtzi}X?GYaSaH5-|?SL72dD%m^I^X6DAE zoWZw_1*$M@Wo@nDIL!(Wiv`Amb7Ez54vjeT>RlZ_KGDbw%*MWL6|q?P+;B=P&-=BX z(Y-@u_n+CkaDm1{&FVjA^q=NeJUv)GUWrccbzkE9_Ik0jFFwBK|ESIzo8fyPJewi1 zpP0^+gAtPUivgE(9_fszsHhl=fE=IzHi{Fob;NBU>aV1ufEO-EMg|VhEhA4Nz{L!o zF3nt(Py6{>Mm4^~EMBAW0(spYj$`k+&9E5lYg^bcd+$w`36h*8v$i)7YTZ&!{f$Yc zGM3@#HW*xkr(!s6UjVXYYq%BA`DfTmF%wjyy6+DHA#(vr0}DuWq}-I%QU#Ql$23V zd*3ixAKVE3O50+94;LVQh%Lr@j-^}L2M<&}wKTM<5-klkov>Qlpn=8Awb zwX&1t^BtO(3Gj0;Aft%u)@y8I`Bk`jqaVV3Iat7})%>ORjvA`_MQLI|rw;??(ZYGhw>2+T zwwF#|wCyI4lU^P_vfpO?B&%lYmqIUS^%vB-P;_w?w-={PB9zU>S<06JeUjHkmLx(3 zw#vpL@fWR1S@zY85E#HU|LRqqQ3<_R`Gt(N#tb)-rq-Wa{hnL;?bF`UUeIH*9%=g9 zm-#_)?Os{c;TK91K1WsMYr4I!{uhLW0gD6b_CgqNmNGh7EiH@{MrwOH48Hb`Oz4z% zYZt~4Qv{XNQst2q>$LsJ>c3Wx`Bc;rX7~kmPM))^u1*)C9+|#FfTXTLDMMThLW!I< zuVO1ZPEAgHCP6?_*WHsjG-b6^F#%Jve+}l3l%YKbj-2(Vp66&9cV^P96HjVHP*9*y zVNB!kWlKKhDOB{@9eswEciLGx>Z&WD!`(8{ogG1X>yY#0EIn?5J5pr7hr5MI!}1GB zc@N9c?#*9c=8vH&Fu-d0;$n|`Dcl-nTFTvX3;X?USOSxt6#}ZFv+LqGG_=oFL9E8( zbV2%;t<7&iVa7Sy!#%n9ylE&n*|Lnd=cI4K>(Co<&HD_|v1D4Ys50@f)%M0Je6fBc zg@n&{k{=d$S!i!963Zi6nNZZlY8)NcvusG`;Bhty^cwQosJjmh*huN@v-eP^CMGWNGfJW5wvRI_5r z&&9@1r7*E66F&B;RQ=7{#X56riIIql2DNRrqc6uJvxn?+(!f~4F?-h*Syu}8c55MvJ@`A+BGd)6V_hF zCOKrhKXw393}+|aJRS9O%sgsJ$-0DAR?q@KA?0a$ZX_iE z@h#~q=wU=dVK{$T89O_X-zf&{md@stNNKE{CEX+?e38!dtBt?|)N0+W4H9Mc;&DUkG#jE~XEmw9_2v2G`3l|d|4;ucrF^=b} zE`8`j0hnszcE|NJrs+qS!^gAt9p+ zSyfp%{2cel4jey+OSI?^!Z#^l_*rZ!!uk{$<~b1P=n9I@30&t3>nlR{2gSwqk(q8}JoNJ$xM z!zX!&z5rlpslfq%QdAriH$1I3iY(LL_GE27oUSb8!v_tNlU(WZ;Yy;W1}K6G+WKcb z_}i#EDMDK6&2{JydAXIJ|4PstIwFr&Ewh8_Xc4>my$*7t$`WYB7;!&b3`jG@=j}L-`$|I&rliPsQFbYO2v|{e4_epBDLXwgQMWoCQ$o!kAbat+ z24r(ryQ{M1z~aP`(?<(WVDCaa3AXGhO6; zdlq8%O+5hA$Rh85_wL2{&!zyiqW4~IN~kWM>!JBURbAz`K_-5O0PIr35-OVJ2o?qD z)YSofr4|>~2(--8uat;Znw`!1Aegsc*W0r;nH&oM;Lwkj{z14=2EL2SRzCENd2m!7 za(RjxHsDpG@W@mqFMAEb6N4dIHM^ewHn}g)j&WHw6tJ3mp@Cu|reU?mEo5iTpjEQo zgr|~VdKJ6#B-KHp{vgLLRxqKntw2zYucxlw8TuxSGwk?HTn5F6T z?i|K5s0y2K@W0XhKo#=nS?WDi%3t#$O9n-E^=I!^gjAZFmFavz?DK_-0T^4oSkjDm zw#$1Ls5G2=c;VDEzyluqnJPn~owu@O$Mm}LLXJ34a+1(N@xj#H#E57eLGrd6tv9JF zoh#|JtkS_oe(QJneeU^jL6Z?#TumfDrFxLNq^GEEYhl++-{rcKT)=Yh%h7&79A;|T=Qm!r z+sS7yj=B3xGr^Tp(V*eZ%aNA}0I`UZ5!Ixx9EOOGKp(mPpk>Ru8)UEuyFm{4Bty(PN`Q<~kY z)oHhwr$2YN_5#!Vz@FZ1L;QN;ll|6bA6YP7)C{MhKDt!xRS-x72c`_q6;6X3Yw#&p z=Er@WNb)u$EJaBxFCBqy-EMhZZH6O{$mp6Xjt!w3d?OQBW(#l!#t|W`x}sD*7jm_!y~wwJ z{^U#SGfH?BA_U8W{@bcK(Oa?OBjbu7$dkM#r|o^``4&81yr-?DDcbH6QktqNY~Ex4 z{x4jauw9}ZX83N8nZGmjlwK=BhDi7PFUSd?O#A^m%i?%` zJ_xw=(5{LHN$k3FWbW*@fx-e#EWvcwb87kBSY`nWuJO+#7_bhII$s{xk#*9mim1s3(*!HqE*etweg-l|8 zhAQ~9b~~_o39kNCNqM;lslw<=O!=GsJ5r|;o<^^V(9eU<5M(m z_>(NX!vB$Fn~Ox9n}UhCsYKWKKKD~2NABqMt9npUYb@6z)1Bk6I{~Tqa0FUn%IJK6{q~9#QDp&MLsStLdQY*ZSC~ z_LSvje$3HM2d1R^cE^@3_2*q{jQ%3jF?$>4I~!w#CqK+23|tJ3h0)IyvHIM_-06I6q9w-WV0_w)a4Non$uR z&e*jw{TUe1vHC$oefp5j$}~?$=5AduB|Ck|6P6sFIwD&oYla(XW!Obf0Rn`*v}pi9 z&CO$0WHQglTkerZZBE7S>hK=oFFg!Ay-ec$L&dKR@qrlm5nUeKt6rXzq+fL;_im7V zw#vIdQDRbt|ETy$!c}95?@OK2nEr!2@}~pM+keL_ry`Zi53^pVZ;zjMbVp6DbHYo( zKug(qaiyUkqt;(pkSyLvZxwSkxpezJJ#kh;L&x%}Z-zlBgl)v4zG9RTT{4y>rc^UhXM5h20t2$MjGQY2KSkQm@+CYsDoil4K!9yd8Pmu9f|$|&Hc*G7^F zj@wR`P6{`Xl}m5sgHGP9UP8~NMo-)L+N4pY+0aB=hc?92Q$!j%Gvt?KN! zGzA0eaaxcj`>_5x6=ZMxpSr#>EUqS4a}pvDJVAp68C-(9OM<&waCaSKNWuhncL^5U z-4Z-FgF6IwXK?o9`*!!`iD>6FDEk=vOmf#CTyjk-#8oN!O z-$y!5ollOb&LJdI0&EEoJlqdlb;O(-ll=vQpA$HV?AO-bX&5p%0J} zw+2_&bVA`HOvG+J0F`;}_5k-=Mft7)vZbK)up~C+JhE`cHDbK#?^nM2jgrOX zl$S2PHNohDvd||&Wp>g_G4Z-RE0q&~BUO))$TYM}pkL!eU(Lsf%vP%E4YMJ%U~|I+ zASL0Lv4}Akt_1@5Fm^NkXV?WS@t=K?LvK%KN2lt0Cd8>|=sgDSf<=;G9>#D8P-6^X zVEFt77#Pnx!?Psp6g^6hZmF?@B{(!kA&A`*^8g*VodPrY9}V{x@960BpS}E?PH>Ss z_p@KZbMzzX*tD`@zawY{B}^+nt+v!1-|PBWvNXOboD%aYW$#C^Rq8ck>i6C-sM>Ok z2-auKovShz^)(t4OK%7Pwyd>0J_{MSJb5UQH?NKJQk#NJPVX@EKM>+~;hn9`2>6s! zCYQo7L`11G3<9|dNA`&;Pqh_>%ap9F6F3Hh3j$eeNJ+G8XsTfydcUEl9bwiV zm^w+xLIU<5ywp57=Av(NO8(`0Lbr7gDv@yakzq)RJm+5+###3w=v3U5=tCP8xeAeJi-&a(O#RPqpdgQA&D-{Nrk?_$JKp4T9VZJ zO(y|Q1=+134`o$*-33o%JjdjfB%@uRn-$4Rl#=GFeZIo!bK+} zya?UhATvkvl-4*T^zWU3i98VV)pIi%q&t}mwFR|UqxJ4McedVJ5W;WLq}8MGzY0~p zn#Ra3wbKx6X+C#ESZc0b>h8wl#-}L;+C7itRBt3Gz(8ZB|MeLFoaE7|*C7b&449>*@3rcXa}^iAp5?}h)+tldl=D}+;CVo*JOXKBt` zbBaY-Xm4pO_jb}JnXr{%xCqT~Q(&#BzX)}N()LBjn8>i`-k0LkS!oLFc8`yss|Vb7 zpwC=35)%L5JP3E9e8X%F=^oA9!h-TSHdLsnpxgjbZC&jpaZK4X0DigppzchE#%uXy+xD(q8KY@E0`xxMMW8MiVci>+N7AS-uG zbdyTSB!StT7Aj|sep-Dv=R@)r6?S|FvhM1UcG4}Fm-zPA;N0nv6wW0!3UU=0+ z$XT5y@j^W4KVhx~YMQZ$oHs*uQEv4{jRRygrHoW>`Lrfh6R15W8jCa6 zOYL5`mx2}F8ViJ-9?p$Ld0az&BkJUJRJq^^pVO1Cc3sRc?dr$Xh3hBH5O)J*$LnRy zAwsbMmzg=6eNycBNz>1nj`iFwMfcB=bU$of+4_Q4`3B5DVro&HYa@&)Ja&eCYgbpj zY*?EqYqz(WgYfH`*K8snV)lPMre~^zWD6RUh zGp{dmFaVth2vz;-*Mlx!>Kx_vqdDeLqGKVOHU%xbE2c|5GMm1CdNd_D z9G^pI3kj^dSzsOnY4V9>r5p^~x)B+8T5||L$!<0*{lofW7{1`Zl;!)^Nzpo^G8#BuUU}{glc52>cwxb8kaywRtI80 zl?cNOe!c|l)A*1-ANc&^+i?ZrQ~TOV8fCAKd+bzRj8FA`p+ZIz9Fk$_?e6Y-w&sKLO#r=)#!{o)%v! zn{sg5l?Xixv?F0EER;DT6&Rf3@fmffSmSE1{HCM#Non`Awkf=_`&0~R?L`WKsc<@)^vm(6G&0PBZ@$X3Rj`)nmu+S0Y55GQ z`p=z}V(#qF01ly4Y9=PMq9k>agCPZqSZoQAsCK7j3XPr%b>>bS$!)c!q*F&kZN1P0 zrg|kID$cbnhCp+zc}ZE=yKv?*qxMq;(y3oykjVcJu>6dhJ@uxBos@Kxl$^&2DE||X z3Vp&{b3CTupsMa9^eS0VZ*sz0*7M2qa)p=7NGWnH5JcEh#|B3a745^0eq zAc=LG_52e)6(5`?SpI#;JmlWwC#HqQ`zM(oKcZ9za?{r+ zgr`%1E;h?od(pH#{P3~MDtq66UAXfdqR(EdAZvxdl91Gd3zxm%j*pAy_s<6El*C=e)+Tqwyb5FGJSCSZ- zl&4{!J$CcwGM?r3H^6XpAwuR6o?!7jnS{KlGsjqVXJMseKN&WUjcleF6iAG$MRF9^VXDBE z-aVXvAi*0tMAvn+o`WCBG8E&}0`0i_Y24y#@!B+B->+#bh+-Ev>XFwyoD|@jS-+XP z-*}@S(ba2^S{~KlFm+Q=^q^$wWDb6FjCx#4p1`qJDD*0rrru!WM;AyZ!F5xqul`qT z>v?JjgD7{Cp#d8#4A70Iy3b1ulNb*}f0@Qh!#-SZm zvS%F@QQ`@uO{auMwDGCin`}TlzEh<{J5@{na8u3bB;@-@|4JGQCfC(L(6uBkjsa=2 z)8-+f=kbnTVLdQ(U(@F0c;8<~;2Vwi84yM1-?R6m7wop4W8vaYzIAG5oO=-F=l?L@ zuH5)Ze1X_ffU!RTAwPYeAFw=}{NlCg)6A5D@>WYLV9#^hJh+9*^f4KeZ(w-;giA#g z{yx)v`*Wt!`j_$Y=C&(X?kcO3rMu&bEEforMP~mDZ0+4-Q#&ap>avulj#+W;%sB-l z8|S!~b@O-f4)sTdFC8tA4Jv4rvxqo+`MJV*qQ^x3>UU9L@!vz*hp(Ecq2FtMr@dAh zgk?&tFDD%-uwqk|QCe+~cHHBR99z=bniz?i$csGS6*Tu)j1JXy$o4 zOf?FVy#9?W_U1J{{wn2D(5MPsW)44%Hrf=S&|xNcm?E$IP4%t#@@oqXwy`Q2{)O`m z*blWQgo|USBQ)xNfY5eecK5y=#p+RGQsx6xM!SITc`{LPsspED7D_PccrbFXqJ)^< zk6rg$ZG2IXLEc*PuzGkVRo20VkpyEQr+$kMjN==l{r*PDZ^$iQ__#7Xw^ZZp05E43 zbjSSQxmbwpRR)OI+VCnm^_8v9Z(JBYoIZx!eE-irw%W1s|H;c}3&i#w6jRlu1v`z? z<5dkGXZyteE*GA_*ZY2lrGN55UK9rV7L`(z65#GUpUC52NL97|LC1>AnsBiX9&{?i z+}5zyDfizw8t3+hVrz(r)lVbjN!cGC4;CSM@>WPMkGB>|;rVfD^f0QpT3fe+c^spU zl>97493e4zj3!q5VZLv@+4~GAoU6mY+EuD*FuYv*J^kU^4^(dUi>dvk(B+Z4=<)Pw zfi1{z`4;cig2m)g1;6yHE#;m)7y2Zf_Q1f$9XwV4GK1Tmc=7Sl^~PGsc42mp{yI!w z&7&*B>aT(uN_1aq*P4=ce>iC~hAd9(=|Js=6RW6f*O>K*{0vx|8|C--AeD-Lz#%x) z^Al5-14#5GO-*x>!)oEwzY|XBMmpF;2?s?RZ+$fDYXV2RPXp`(YGEPdA&HOZOai?*&?lzte*dYvn-S#EW5APHNxJ*Wq#COLd476tJpP0<4x`pZ19vyFp;_?T! zm4fY>Z$hz!J z^NauSf%Eao%k0MAIuKN6GrON5J={A$Ec{nGG41p)xy)j3-+L++-kH9>x-2GO)V_O^KkP;PtrF|+aA&-lxxjZWI4A49n#w$ z#_NhXgKbu8FP0O_NMn9JPkr-}I4J2zFA+Jyv;GST z1WZC6p6)N1AS_WWc%>=T>>0wol;rX?HJsAsQu%RFBc*B-gMAb_b154>AsKn9ulMLqD%)m{H8 zr^}WCX}-Oa@XrVu8%TF1jul*(-QrF*oUdlK`%0EB#6BGjV#2ohv*Qb%o)|KAyW;&j zu#};qsmA8?%j)g@65{V=*`k$H@ybb@K8}5({VItVeu?zaxx&7pL>D$XGcAQ);Y3zj zhT*9y^D&J=n%2JJ@$N-N{Cf8=YgzMTtHLwy{>(U7)WU5& zS+IK8+bIj|GW8Ec2JrCI@-_jzcTIlx?hz64-e=Brx)JKV?nOs`gtv8Su1Xr)=ru>Q zj&2QJx$GA@#x)em1jT?x)k{8LdD_KvY3r zxSQaXz)&9!QhTkbIUO_E!1oQDPFgi1tmpP~yKJ`p**E4z`F@5I>D7VM=;!Sb$f!#~ z4p-YlC-OZ*juY9)>dV)f+-pBT2wWpf!*&Q{FA7m|5L}wCaHL+)QmEl@T)r0mJhX?2 z*Xg!0wE3kMtH@QS3P#9(YLxu{q<#!Gm^ysDAsYa>} zL{Dv59Za8r!Djfg_dF!lx$5_nBFzoTWU+s*F;O;O>f!vJP|jn|FytlD7@R$0rK^gR zd|hu6GEI260#O|l?&=2EKY;_>HbR-kRd?VS`LO7Grwre_&1QfmXWB38il5&poHq~p zh>gQk{2uCicU>Y4+9AwZN7Z1dk)1>#ojg41ys|(3>*W&RFilbFZ+p zio>goA3p|qrxgh*_aiD(dq)Cr3TpSdhX3T8J=E`Xbi$@jSS>V~S=w_CI3PjT#ea$p z>*=)Sw9ER?pQwR7JP^SAgO^&@$hTM>*R|LXPp`noQGJw6Aq%;auf#V`@^RtC5?3>d-{kOH~qbGeO(D`?D6i5#lClj&q_G zge2E3y7DEGs-A?uN9{U*|mxa4ZJCEX|xhBTBDyL6k>BTuyeCWz3 zq1&{tD|Ls^2`;T?Kke-WL)>&R^q`japY)5)J0>2j=B|d`)J5#$vssN?&hbMFCM^yc z#V>p`Y|}+ zyU=4n$pAa^3w^t>`ZtNd;>8(Y*m7}proU>W9=4EH&aHSXDiOB1Sai-|DWU+0aH1s8 zFJ*|l`!R9Be|#OiY~Qguz%kojEl?GM3V7XL;rME*kcPzyqdt4OPz}?0>EY@)!LvK( zGO_G3Qe~<2#|iKner9{e2;7SWc(y>uvcMl07{~(?nY-eh5Yuo#8EEI1qaFwb!%SG+ zJJ~)Uy~;xd?i+BZ7}Dd~CozffuR;QS)_K}l0X&JjR2|nMv?<6aA?B)k?wHC|CyRvf z3JBK3N|3Aw4=lqZ%Fm;I!141ge}w1l5^*(t${+!}FS9u4eCqcBH${wM4PJJE=XS)8 zJj`Tf|9ppHd!L(Sd2}SOhsxz78Ok17w5()VTt%Jc|JN1wjAQy@(|Q zE3#-IL*#vK%+%o4nG>Jrp%<+PIcVYqKo|bXw@o|{BTN%*c&74|#o%J|(cz-EX4#^G zha(X4-w#vJ%5N$Y@8zoBqOSvR7woO3Gn_e;{B&2S#Q6A z3nIc@di{;oT%3KEX&#P#KGWw40m3V-`6M#}@)}C@ng{o@FE8P(vwq#OVdzXKRX_~@ zVOCd_oqV?6rQ)^!wV*5NvAns}n{@@O*Qrks2*lZ0Xjm9yo!%ECt-9zsltd)~-^?hK z#>O+%BXmTC`Y(PP5J+i_&Uqwe$QWD9s@k|d5HsU~YP{e2Ldct+xgYpGR)XQj@hW&O z%rrMcAj8r6R#230h-=IyKZ5NU=<{2JFWw1o4{6OzZgv|DzwIDuvTJnjsXS+U>V-iG zLO`k=r#3p~`ZMmTLqkQk=LW60vylphDXe=pZ&3byz2q93;Ke%iub9&jKHz?(`xPP| zpl$wZCj_LFC8LC+ekHy-0s@)+nW$L%g$tC9UWgH~kFB&vsN z1Dybx3-?WeVuqah`6Ko#faJCU*v@cvje)U8Uzh|K$^8A1o`PhQZCA22)RX%9`gCSQ z{*DMBcHxz_@S*m<9ilDEDkb0Qi-^)O(yM3xPnkZmDLGC>$K|v)+Bd{1%I2w1yoTb; zGx4l1Bb^BY_I04tx&oAKrX3Ec z5Eyqba8657H9q+6fvbpC^lz_Z>uKpb56PP;PqN6qdiE5D4+K*COM~%E$nWOdN?5oZkeBfQuVRLPvgI7M0lWH~cOPe^unp z*vh1(QhrBC&rJQWw>>T14h*0oBLn(#Fz+jj&Z4=Ihx^SM-U#7o<$>RDvL@GB+`meq zNZkOanq<3z!mkXorM0y$nDdXoPTkSfth_014f~c9(VT5yvsKC^3j>(3J5ns7t^Uxi zR32NVI9~ka(tR_^>o|U(Znh!cz_QCq!r)yUJq&W!scIV!p|`1ApK`E(hs{;@Gr+~G zLC;+>nNA6v#37=CmaP&>Kzg<0f33z=j}F*ayL^ znn@p!K^yp*+eIP+XzK`wHm*^y5YTq-SY%OUss1?`Kpr52cRmM!q6~2nds;I8oGT6I zQsoJ(__qMcmdcFKDEQx#StIFVGeMwdqN^|dqZXAui{sp#YWomz0zD3F)<=S2FZUFf zmx_;BCr@GEwyz?jK0=xKJJ!C)INus&QIt)M1wcx#`yCHDfNaY^m%$i_i7r#>OMF;8 z(o=7ITv);X0+bYlJ6g@ihX@j@j_x~KJsOnFOU9SHqc>fRsza|;Ybet3GB4d~)r1<&qp2965;pc7l%vZM$aeFl>t7$sH6ixavswcAZkjvl+#p$Z$e(F(WO zowjbQFbq(ZkCAp^w8r9@O3b+cSE#uz59H?R>gJ^_zt45Mu-EY`Kc*02E{o6cUjQlz zkMdCu6#$xm1FkTSEHyOa3pC0J{rWGr6*)RP5Xx;+t=wRlkB8FrO*na6e?GST8vUF_ z!qDR}`d!KQ2`G+%vH0B}z4x1{LsgBwds&50C{k*1h{Kh~=rm{L;m)7yf_KttSGp(F zIlmnE8y*%kE{5XKg>+52Bzzq5&W`g|_sRDnzmcYy5E5-4t~>D^*av?zsPxwuJ}U2} zShyTV^8b42HRG&o^#oM(_Cp3H-MQE{!u1wVwRgdJdiS!mWejBt+c*sfiLUtKM}46h z4Mz@+6L_n7-E+Sff(V0lZz~7BtB1qlKm(eVgpX=sic2GT-O7Fn&Udj`6!9|JU(bv- zALF$=#cv8+s`&;wU_I{Q`g%3WCP3wCEq1oH3Y1=!EqDWIPiY>J_+K5U=MCH@4ciC~ zx?C(p?NzcokmM-_8%RhNd=yRujH!Ycb4ESdijGr^WlWCVLOUCp9#OxUUT^l5dYe z$GN{dpGg6TUhHu^>i;*SpOqQ!BKPK?6E+7vG0o+PG zTfa++A(I@^h<%kE6Y;!Ql1Ma~;;%}Cz8648RXY0ygZ14^W{LVM3H@ErMiGYO!DvA^ z-8^WiMEFhS3G;o?01E-L{kUcZ1PqqI*?Elh%l*+ZA5CrWs_9DQ?m+BjZL-uQKg@Z5 ze86o0sWOehi=@uBKM6&3VOsx2!mrnB{asV5y{B*ic3IAc=Ees$`+(&sc13o}Z|)`g z@^|$M5U6KD*eRXK@(r3Z>BDRv?*{`qwzH3`?x=pBuH>{W^io_0dl_g&#VlagXs0L) zj6}-0kMAN)=r~*?>3K}Z#C%_zJ1-KSxm|p>_(BRbD&&}>%272`6aD(2h2|zc`@3Wd z7c9KG4=}7Wgu4;3n_M(%exFl405tw-OZl(S>bRbl<2r(O2iKn~3%Z z7U>|grkTmc4{HuwKaA%VhrEM$e3tkiHu&j2_k--Ijb9E+d6sKQc`J$dSe99r(&VWK zov`^6yuAzVE5}L+H4jN{hfnEzm*oP>=YY%izFX}>4eHdp)*IJ`)j1d$Q;vEL{nW{9 z9ct#1-(y)aLhYuc5dE|Bv2!zcgF_v5T)FL@hwTXOLCc)4wv4~V#!e;*RU;xIa1*2< zkhl;T_|Gpr+1by98KgDXCeXVzDdJz_6|1yKN~EQN&8L+&RN}pV-PMtkXcN*BH$fCL zy^cEZN3?Kl8^^kH^M&nVQWD-GC=$8>Thz3J0lNcS%}~+sy5n>_(6uZhn;FpOGdlT9 zwQUn3tNwlo#Em$KDTQ;;kai12alCBdrKNW0bhl=s3QDsmBG}-~{&^B!(TB%Y1VG2U zu%J!jsnO1u0hdk(W(D=$Ty-J}$Xiil)xO|mgSm$fD}IHq#dd03(gkzcwGStS{e<79 z($8)a+ML89_Jv-=3G5X{O-!&IvAb;MILSsxMIP4;F}r`RTd%+bv@9F3| zKR3+|9=MwoMAw!$-(9%Dy)~B5@uDVqTR?WZPTW#)JIxj`dq4trdOT_^?EUqGNP$Mi z)0U3aS!acNChj^6-ynj`0k&ulcL|`DG>d82eriA`$%{9`S?sttpl1M5SBiR@W&cQb~xLkK7Wo;3^}^1b!@iA+(P{HoCVSS;c$M$HWuoB zP*;DSaI~G@iw(}y{!@EVHTm>bAmAIuT5kKYqiggwg|$3Q9^EZFgj5Vw+}PxwRi3iGk#b3RmguQ1M<)=c!D`wa?RR@)}5N&*WB&iHUi< za%#n8h%+shnL@8NXmxGvIFh$r6pdF1MvHV1?$C(+KHfpV%vJ>Z7<0;)z?fPjj@_|< zw#>%yk}rHGCNEi?4If2$j{~BvI@A;=fG@XceoN8#kh$mnc2k0xsQ#0g;^SwgagJCF z@8MTXOMYKk->MSGsw}rovyQS@z9IHSN=F>2fV+NB*GD(tSALV0Mjo4*a(8~~^M3m? z{2O?c%gLav-J&BW=_Tfo!P1V?_Gwya8%7GuvBc}#wvQ6VQOPmF8=IndN~+hM{`S$T zaLWj2ans6)nB{TE2^wl}Wrugd?e_4Qxo-l83OS76g7i}>`_)6>n7pgsxmE5*p>gF{ zhnuh0!K?h224Mr{Co|g8bY8*Qt_-CiNBb#gx(^5MOMBnvL*aLD6Ca{?MMRnMe{ z*P5M}ym$yIP>hH!_RVk!x*OQ+-`Jel-6q(1+F96*|E>!k+nQ6p3-c>wxwf(rvm>(0 z7j{5xcgm=w*a;!cXzkl+ta+Yj?6x!Qz7-w%`#zA9_TvEH93fj#k;QKhGXXov14vUdL#o(W*cKMz|Dlt$tg%{ zZef;e!0@w8?DfuzTfHLQEXi0CP;?;)cU_r!)d7a=%Ej*D}y$T7H*ewXd;U zD_Ri^jiJQrcZ`hc&VChJ^;tom1S9aw1R+0_Nuy`SzISLowfJ(zNNJX~n> zu!+?CBpzAX?xteIvnI*>mR`Uenh``@gwtKD8WQ zcfgd)O%H&AEmZc`BQ!jb_FWXPC?FYQVsG*9HQ9b~he>)M6S(JT?&UMuZKCtzgPIydTZ~*@ZKD;ZsMdDc zXW-hU+zW$~+@WKni(sZ<_n~3n2NmUEN5l~R9$&eAfj$Ch{0W|wx3+d;Qvy0Ct(e6^ zlPiC04HZnu4Q*F-^@MEu^2Pf2!4C`L(*u*BSBa4=-7_6cClNt?>xcJfxegS}0ys9T zLJzbuvNu`wrju@2>bEcGxIKEHS^R_vO$Hg-Z`Rf|9df(A%B$(r)qDBg490$0W11{n zz&F}5Np(LP%}t_z&A)atn@X+fQR>Fru*~RHVb+myyu}>L!~^@d)FK0Lf_DI}$|_Iv1V2e+tA?|0 zKWE10Y#u>Lo7@@5$yf8w6tHMBXmz|}t%Q}$Lt93IdKk4ak8un~R(-4KX((lLE@-PXvn<=YoObP0B z?LRXJU|hajrlAY{sD_<~5?t8xqhq!YC0*RfGs5iAnK`Cn$uV|qV&o7RsoI6XV6+4C zl;^$07G{&-3cmdI=e#yNc8b4{VPOo4m2IS|?54KB;4#@+61?(0@lJ5PSqloX5RT05 zZ~3H>SLczO#MN(oUX5r#f0AME4Yp-UJ#h(4o;?3xG?dCA2>p{3JN_xXiSM~Ig!!(- zpq1>7!!|tKP2wHa)%A5euYXP&nd)f{tv=MASyMfpBBmy2gVdsA`Z>>zrUdDjl~E{O zcXLUmb$4-;$x!&xv2N0kjA7s(Zn5|~V)a=Rrj5yHHcj0&hvUNBn8!+*bMF+SbO^N2 zus=yykEqgS<(pVxZhl^VE)_nS+Bj^gxS(89QE?~0DBB!cQ0P5kga}xj$Cewh_qcq9 zKEGI?Z50|{5PLJ8+x3IuVH7vdG*vOlwmck~SCY#p$RZnfR8t(aDPnbLP=jZtg?oMh za9EzB-Z=NaXA7$+L2sCP!+n^fMQCGA;D3UF6y!X literal 0 HcmV?d00001 diff --git a/doc/source/_static/schemas/02_io_readwrite.svg b/doc/source/_static/schemas/02_io_readwrite.svg new file mode 100644 index 0000000000000..a99a6d731a6ad --- /dev/null +++ b/doc/source/_static/schemas/02_io_readwrite.svg @@ -0,0 +1,1401 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + read_* + to_* + + + + + + + + + + + + + + + + + + + + + + + CSV + + + + + + + + + + + + + XLS + + + + + + + + + + + + + + + + + + PARQUET + + + + + + + + HTML + + <> + + + + + HDF5 + + + + + + + + JSON + + {} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GBQ + + + + + + + SQL + + + + + + ... + + + + + + + + + + CSV + + + + + + + + + + + + + XLS + + + + + + + + + + + + + + + + + + PARQUET + + + + + + + + HTML + + <> + + + + + HDF5 + + + + + + + + JSON + + {} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GBQ + + + + + + + SQL + + + + + + ... + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_columns.svg b/doc/source/_static/schemas/03_subset_columns.svg new file mode 100644 index 0000000000000..5495d3f67bcfc --- /dev/null +++ b/doc/source/_static/schemas/03_subset_columns.svg @@ -0,0 +1,327 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_columns_rows.svg b/doc/source/_static/schemas/03_subset_columns_rows.svg new file mode 100644 index 0000000000000..5ea9d609ec1c3 --- /dev/null +++ b/doc/source/_static/schemas/03_subset_columns_rows.svg @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_rows.svg b/doc/source/_static/schemas/03_subset_rows.svg new file mode 100644 index 0000000000000..41fe07d7fc34e --- /dev/null +++ b/doc/source/_static/schemas/03_subset_rows.svg @@ -0,0 +1,316 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/04_plot_overview.svg b/doc/source/_static/schemas/04_plot_overview.svg new file mode 100644 index 0000000000000..44ae5b6ae5e33 --- /dev/null +++ b/doc/source/_static/schemas/04_plot_overview.svg @@ -0,0 +1,6443 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + .plot.* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ... + + + diff --git a/doc/source/_static/schemas/05_newcolumn_1.svg b/doc/source/_static/schemas/05_newcolumn_1.svg new file mode 100644 index 0000000000000..c158aa932d38e --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_1.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/05_newcolumn_2.svg b/doc/source/_static/schemas/05_newcolumn_2.svg new file mode 100644 index 0000000000000..8bd5ad9a26994 --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_2.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/05_newcolumn_3.svg b/doc/source/_static/schemas/05_newcolumn_3.svg new file mode 100644 index 0000000000000..45272d8c9a368 --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_3.svg @@ -0,0 +1,352 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_aggregate.svg b/doc/source/_static/schemas/06_aggregate.svg new file mode 100644 index 0000000000000..14428feda44ec --- /dev/null +++ b/doc/source/_static/schemas/06_aggregate.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby.svg b/doc/source/_static/schemas/06_groupby.svg new file mode 100644 index 0000000000000..ca4d32be7084b --- /dev/null +++ b/doc/source/_static/schemas/06_groupby.svg @@ -0,0 +1,307 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby_agg_detail.svg b/doc/source/_static/schemas/06_groupby_agg_detail.svg new file mode 100644 index 0000000000000..23a78d3ed2a9e --- /dev/null +++ b/doc/source/_static/schemas/06_groupby_agg_detail.svg @@ -0,0 +1,619 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby_select_detail.svg b/doc/source/_static/schemas/06_groupby_select_detail.svg new file mode 100644 index 0000000000000..589c3add26e6f --- /dev/null +++ b/doc/source/_static/schemas/06_groupby_select_detail.svg @@ -0,0 +1,697 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_reduction.svg b/doc/source/_static/schemas/06_reduction.svg new file mode 100644 index 0000000000000..6ee808b953f7e --- /dev/null +++ b/doc/source/_static/schemas/06_reduction.svg @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_valuecounts.svg b/doc/source/_static/schemas/06_valuecounts.svg new file mode 100644 index 0000000000000..6d7439b45ae6f --- /dev/null +++ b/doc/source/_static/schemas/06_valuecounts.svg @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + 3 + + 2 + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_melt.svg b/doc/source/_static/schemas/07_melt.svg new file mode 100644 index 0000000000000..c4551b48c5001 --- /dev/null +++ b/doc/source/_static/schemas/07_melt.svg @@ -0,0 +1,315 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_pivot.svg b/doc/source/_static/schemas/07_pivot.svg new file mode 100644 index 0000000000000..14b61c5f9a73b --- /dev/null +++ b/doc/source/_static/schemas/07_pivot.svg @@ -0,0 +1,338 @@ + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_pivot_table.svg b/doc/source/_static/schemas/07_pivot_table.svg new file mode 100644 index 0000000000000..81ddb8b7f9288 --- /dev/null +++ b/doc/source/_static/schemas/07_pivot_table.svg @@ -0,0 +1,455 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_concat_column.svg b/doc/source/_static/schemas/08_concat_column.svg new file mode 100644 index 0000000000000..8c3e92a36d8ef --- /dev/null +++ b/doc/source/_static/schemas/08_concat_column.svg @@ -0,0 +1,465 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_concat_row.svg b/doc/source/_static/schemas/08_concat_row.svg new file mode 100644 index 0000000000000..116afc8f89890 --- /dev/null +++ b/doc/source/_static/schemas/08_concat_row.svg @@ -0,0 +1,392 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_merge_left.svg b/doc/source/_static/schemas/08_merge_left.svg new file mode 100644 index 0000000000000..d06fcf2319a09 --- /dev/null +++ b/doc/source/_static/schemas/08_merge_left.svg @@ -0,0 +1,608 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + + + + + + + key + + + + + key + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/conf.py b/doc/source/conf.py index c12c148d0f10d..a95cd4ab696f7 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -232,6 +232,7 @@ html_static_path = ["_static"] html_css_files = [ + "css/getting_started.css", "css/pandas.css", ] diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst index 5d7c9e405cfc2..200d567a62732 100644 --- a/doc/source/getting_started/dsintro.rst +++ b/doc/source/getting_started/dsintro.rst @@ -444,6 +444,7 @@ dtype. For example: data pd.DataFrame.from_records(data, index='C') +.. _basics.dataframe.sel_add_del: Column selection, addition, deletion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 34bb4f930f175..a2f8f79f22ae4 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -6,15 +6,666 @@ Getting started =============== +Installation +------------ + +Before you can use pandas, you’ll need to get it installed. + +.. raw:: html + +
+
+
+
+
+ Working with conda? +
+
+

+ +Pandas is part of the `Anaconda `__ distribution and can be +installed with Anaconda or Miniconda: + +.. raw:: html + +

+
+ +
+
+
+
+
+ Prefer pip? +
+
+

+ +Pandas can be installed via pip from `PyPI `__. + +.. raw:: html + +

+
+ +
+
+
+
+
+ In-depth instructions? +
+
+

Installing a specific version? + Installing from source? + Check the advanced installation page.

+ +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
+
+
+
+
+ +.. _gentle_intro: + +Intro to pandas +--------------- + +.. raw:: html + +
+
+ +
+ +
+
+ +When working with tabular data, such as data stored in spreadsheets or databases, Pandas is the right tool for you. Pandas will help you +to explore, clean and process your data. In Pandas, a data table is called a :class:`DataFrame`. + +.. image:: ../_static/schemas/01_table_dataframe.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_01_tableoriented>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these +data sources is provided by function with the prefix ``read_*``. Similarly, the ``to_*`` methods are used to store data. + +.. image:: ../_static/schemas/02_io_readwrite.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_02_read_write>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the +data you need are available in Pandas. + +.. image:: ../_static/schemas/03_subset_columns_rows.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_03_subset>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...) +corresponding to your data. + +.. image:: ../_static/schemas/04_plot_overview.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_04_plotting>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +There is no need to loop over all rows of your data table to do calculations. Data manipulations on a column work elementwise. +Adding a column to a :class:`DataFrame` based on existing data in other columns is straightforward. + +.. image:: ../_static/schemas/05_newcolumn_2.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_05_columns>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Basic statistics (mean, median, min, max, counts...) are easily calculable. These or custom aggregations can be applied on the entire +data set, a sliding window of the data or grouped by categories. The latter is also known as the split-apply-combine approach. + +.. image:: ../_static/schemas/06_groupby.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_06_stats>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot` +from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command. + +.. image:: ../_static/schemas/07_melt.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_07_reshape>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Multiple tables can be concatenated both column wise as row wise and database-like join/merge operations are provided to combine multiple tables of data. + +.. image:: ../_static/schemas/08_concat_row.svg + :align: center + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_08_combine>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data. + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_09_timeseries>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+ +
+
+ +Data sets do not only contain numerical data. Pandas provides a wide range of functions to cleaning textual data and extract useful information from it. + +.. raw:: html + +
+ + +:ref:`To introduction tutorial <10min_tut_10_text>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
+
+
+
+ +
+
+ + +.. _comingfrom: + +Coming from... +-------------- + +Currently working with other software for data manipulation in a tabular format? You're probably familiar to typical +data operations and know *what* to do with your tabular data, but lacking the syntax to execute these operations. Get to know +the pandas syntax by looking for equivalents from the software you already know: + +.. raw:: html + +
+
+
+
+ R project logo +
+

The R programming language provides the data.frame data structure and multiple packages, + such as tidyverse use and extend data.frames for convenient data handling + functionalities similar to pandas.

+ +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
+
+
+
+
+ SQL logo +
+

Already familiar to SELECT, GROUP BY, JOIN,...? + Most of these SQL manipulations do have equivalents in pandas.

+ +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
+
+
+
+
+ STATA logo +
+

The data set included in the + STATA statistical software suite corresponds + to the pandas data.frame. Many of the operations known from STATA have an equivalent + in pandas.

+ +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
+
+
+
+
+ SAS logo +
+

The SAS statistical software suite + also provides the data set corresponding to the pandas data.frame. + Also vectorized operations, filtering, string processing operations,... from SAS have similar + functions in pandas.

+ +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
+
+
+
+
+ +Community tutorials +------------------- + +The community produces a wide variety of tutorials available online. Some of the +material is enlisted in the community contributed :ref:`tutorials`. + + .. If you update this toctree, also update the manual toctree in the main index.rst.template .. toctree:: :maxdepth: 2 + :hidden: install overview 10min + intro_tutorials/index basics dsintro comparison/index diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst new file mode 100644 index 0000000000000..02e59b3c81755 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst @@ -0,0 +1,218 @@ +.. _10min_tut_01_tableoriented: + +{{ header }} + +What kind of data does pandas handle? +===================================== + +.. raw:: html + +
    +
  • + +I want to start using pandas + +.. ipython:: python + + import pandas as pd + +To load the pandas package and start working with it, import the +package. The community agreed alias for pandas is ``pd``, so loading +pandas as ``pd`` is assumed standard practice for all of the pandas +documentation. + +.. raw:: html + +
  • +
+ +Pandas data table representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/01_table_dataframe.svg + :align: center + +.. raw:: html + +
    +
  • + +I want to store passenger data of the Titanic. For a number of passengers, I know the name (characters), age (integers) and sex (male/female) data. + +.. ipython:: python + + df = pd.DataFrame({ + "Name": ["Braund, Mr. Owen Harris", + "Allen, Mr. William Henry", + "Bonnell, Miss. Elizabeth"], + "Age": [22, 35, 58], + "Sex": ["male", "male", "female"]} + ) + df + +To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and +the values in each list as rows of the ``DataFrame``. + +.. raw:: html + +
  • +
+ +A :class:`DataFrame` is a 2-dimensional data structure that can store data of +different types (including characters, integers, floating point values, +categorical data and more) in columns. It is similar to a spreadsheet, a +SQL table or the ``data.frame`` in R. + +- The table has 3 columns, each of them with a column label. The column + labels are respectively ``Name``, ``Age`` and ``Sex``. +- The column ``Name`` consists of textual data with each value a + string, the column ``Age`` are numbers and the column ``Sex`` is + textual data. + +In spreadsheet software, the table representation of our data would look +very similar: + +.. image:: ../../_static/schemas/01_table_spreadsheet.png + :align: center + +Each column in a ``DataFrame`` is a ``Series`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/01_table_series.svg + :align: center + +.. raw:: html + +
    +
  • + +I’m just interested in working with the data in the column ``Age`` + +.. ipython:: python + + df["Age"] + +When selecting a single column of a pandas :class:`DataFrame`, the result is +a pandas :class:`Series`. To select the column, use the column label in +between square brackets ``[]``. + +.. raw:: html + +
  • +
+ +.. note:: + If you are familiar to Python + :ref:`dictionaries `, the selection of a + single column is very similar to selection of dictionary values based on + the key. + +You can create a ``Series`` from scratch as well: + +.. ipython:: python + + ages = pd.Series([22, 35, 58], name="Age") + ages + +A pandas ``Series`` has no column labels, as it is just a single column +of a ``DataFrame``. A Series does have row labels. + +Do something with a DataFrame or Series +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
    +
  • + +I want to know the maximum Age of the passengers + +We can do this on the ``DataFrame`` by selecting the ``Age`` column and +applying ``max()``: + +.. ipython:: python + + df["Age"].max() + +Or to the ``Series``: + +.. ipython:: python + + ages.max() + +.. raw:: html + +
  • +
+ +As illustrated by the ``max()`` method, you can *do* things with a +``DataFrame`` or ``Series``. pandas provides a lot of functionalities, +each of them a *method* you can apply to a ``DataFrame`` or ``Series``. +As methods are functions, do not forget to use parentheses ``()``. + +.. raw:: html + +
    +
  • + +I’m interested in some basic statistics of the numerical data of my data table + +.. ipython:: python + + df.describe() + +The :func:`~DataFrame.describe` method provides a quick overview of the numerical data in +a ``DataFrame``. As the ``Name`` and ``Sex`` columns are textual data, +these are by default not taken into account by the :func:`~DataFrame.describe` method. + +.. raw:: html + +
  • +
+ +Many pandas operations return a ``DataFrame`` or a ``Series``. The +:func:`~DataFrame.describe` method is an example of a pandas operation returning a +pandas ``Series``. + +.. raw:: html + +
+ To user guide + +Check more options on ``describe`` in the user guide section about :ref:`aggregations with describe ` + +.. raw:: html + +
+ +.. note:: + This is just a starting point. Similar to spreadsheet + software, pandas represents data as a table with columns and rows. Apart + from the representation, also the data manipulations and calculations + you would do in spreadsheet software are supported by pandas. Continue + reading the next tutorials to get started! + +.. raw:: html + +
+

REMEMBER

+ +- Import the package, aka ``import pandas as pd`` +- A table of data is stored as a pandas ``DataFrame`` +- Each column in a ``DataFrame`` is a ``Series`` +- You can do things by applying a method to a ``DataFrame`` or ``Series`` + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +A more extended explanation to ``DataFrame`` and ``Series`` is provided in the :ref:`introduction to data structures `. + +.. raw:: html + +
\ No newline at end of file diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst new file mode 100644 index 0000000000000..797bdbcf25d17 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -0,0 +1,232 @@ +.. _10min_tut_02_read_write: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

    + To raw data +
    +
    +
  • +
+
+ +How do I read and write tabular data? +===================================== + +.. image:: ../../_static/schemas/02_io_readwrite.svg + :align: center + +.. raw:: html + +
    +
  • + +I want to analyse the titanic passenger data, available as a CSV file. + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + +pandas provides the :func:`read_csv` function to read data stored as a csv +file into a pandas ``DataFrame``. pandas supports many different file +formats or data sources out of the box (csv, excel, sql, json, parquet, +…), each of them with the prefix ``read_*``. + +.. raw:: html + +
  • +
+ +Make sure to always have a check on the data after reading in the +data. When displaying a ``DataFrame``, the first and last 5 rows will be +shown by default: + +.. ipython:: python + + titanic + +.. raw:: html + +
    +
  • + +I want to see the first 8 rows of a pandas DataFrame. + +.. ipython:: python + + titanic.head(8) + +To see the first N rows of a ``DataFrame``, use the :meth:`~DataFrame.head` method with +the required number of rows (in this case 8) as argument. + +.. raw:: html + +
  • +
+ +.. note:: + + Interested in the last N rows instead? pandas also provides a + :meth:`~DataFrame.tail` method. For example, ``titanic.tail(10)`` will return the last + 10 rows of the DataFrame. + +A check on how pandas interpreted each of the column data types can be +done by requesting the pandas ``dtypes`` attribute: + +.. ipython:: python + + titanic.dtypes + +For each of the columns, the used data type is enlisted. The data types +in this ``DataFrame`` are integers (``int64``), floats (``float63``) and +strings (``object``). + +.. note:: + When asking for the ``dtypes``, no brackets are used! + ``dtypes`` is an attribute of a ``DataFrame`` and ``Series``. Attributes + of ``DataFrame`` or ``Series`` do not need brackets. Attributes + represent a characteristic of a ``DataFrame``/``Series``, whereas a + method (which requires brackets) *do* something with the + ``DataFrame``/``Series`` as introduced in the :ref:`first tutorial <10min_tut_01_tableoriented>`. + +.. raw:: html + +
    +
  • + +My colleague requested the titanic data as a spreadsheet. + +.. ipython:: python + + titanic.to_excel('titanic.xlsx', sheet_name='passengers', index=False) + +Whereas ``read_*`` functions are used to read data to pandas, the +``to_*`` methods are used to store data. The :meth:`~DataFrame.to_excel` method stores +the data as an excel file. In the example here, the ``sheet_name`` is +named *passengers* instead of the default *Sheet1*. By setting +``index=False`` the row index labels are not saved in the spreadsheet. + +.. raw:: html + +
  • +
+ +The equivalent read function :meth:`~DataFrame.to_excel` will reload the data to a +``DataFrame``: + +.. ipython:: python + + titanic = pd.read_excel('titanic.xlsx', sheet_name='passengers') + +.. ipython:: python + + titanic.head() + +.. ipython:: python + :suppress: + + import os + os.remove('titanic.xlsx') + +.. raw:: html + +
    +
  • + +I’m interested in a technical summary of a ``DataFrame`` + +.. ipython:: python + + titanic.info() + + +The method :meth:`~DataFrame.info` provides technical information about a +``DataFrame``, so let’s explain the output in more detail: + +- It is indeed a :class:`DataFrame`. +- There are 891 entries, i.e. 891 rows. +- Each row has a row label (aka the ``index``) with values ranging from + 0 to 890. +- The table has 12 columns. Most columns have a value for each of the + rows (all 891 values are ``non-null``). Some columns do have missing + values and less than 891 ``non-null`` values. +- The columns ``Name``, ``Sex``, ``Cabin`` and ``Embarked`` consists of + textual data (strings, aka ``object``). The other columns are + numerical data with some of them whole numbers (aka ``integer``) and + others are real numbers (aka ``float``). +- The kind of data (characters, integers,…) in the different columns + are summarized by listing the ``dtypes``. +- The approximate amount of RAM used to hold the DataFrame is provided + as well. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+

REMEMBER

+ +- Getting data in to pandas from many different file formats or data + sources is supported by ``read_*`` functions. +- Exporting data out of pandas is provided by different + ``to_*``\ methods. +- The ``head``/``tail``/``info`` methods and the ``dtypes`` attribute + are convenient for a first check. + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +For a complete overview of the input and output possibilites from and to pandas, see the user guide section about :ref:`reader and writer functions `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst new file mode 100644 index 0000000000000..7a4347905ad8d --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst @@ -0,0 +1,405 @@ +.. _10min_tut_03_subset: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
  • +
+
+ +How do I select a subset of a ``DataFrame``? +============================================ + +How do I select specific columns from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_columns.svg + :align: center + +.. raw:: html + +
    +
  • + +I’m interested in the age of the titanic passengers. + +.. ipython:: python + + ages = titanic["Age"] + ages.head() + +To select a single column, use square brackets ``[]`` with the column +name of the column of interest. + +.. raw:: html + +
  • +
+ +Each column in a :class:`DataFrame` is a :class:`Series`. As a single column is +selected, the returned object is a pandas :class:`DataFrame`. We can verify this +by checking the type of the output: + +.. ipython:: python + + type(titanic["Age"]) + +And have a look at the ``shape`` of the output: + +.. ipython:: python + + titanic["Age"].shape + +:attr:`DataFrame.shape` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parantheses for attributes) of a +pandas ``Series`` and ``DataFrame`` containing the number of rows and +columns: *(nrows, ncolumns)*. A pandas Series is 1-dimensional and only +the number of rows is returned. + +.. raw:: html + +
    +
  • + +I’m interested in the age and sex of the titanic passengers. + +.. ipython:: python + + age_sex = titanic[["Age", "Sex"]] + age_sex.head() + +To select multiple columns, use a list of column names within the +selection brackets ``[]``. + +.. raw:: html + +
  • +
+ +.. note:: + The inner square brackets define a + :ref:`Python list ` with column names, whereas + the outer brackets are used to select the data from a pandas + ``DataFrame`` as seen in the previous example. + +The returned data type is a pandas DataFrame: + +.. ipython:: python + + type(titanic[["Age", "Sex"]]) + +.. ipython:: python + + titanic[["Age", "Sex"]].shape + +The selection returned a ``DataFrame`` with 891 rows and 2 columns. Remember, a +``DataFrame`` is 2-dimensional with both a row and column dimension. + +.. raw:: html + +
+ To user guide + +For basic information on indexing, see the user guide section on :ref:`indexing and selecting data `. + +.. raw:: html + +
+ +How do I filter specific rows from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_rows.svg + :align: center + +.. raw:: html + +
    +
  • + +I’m interested in the passengers older than 35 years. + +.. ipython:: python + + above_35 = titanic[titanic["Age"] > 35] + above_35.head() + +To select rows based on a conditional expression, use a condition inside +the selection brackets ``[]``. + +.. raw:: html + +
  • +
+ +The condition inside the selection +brackets ``titanic["Age"] > 35`` checks for which rows the ``Age`` +column has a value larger than 35: + +.. ipython:: python + + titanic["Age"] > 35 + +The output of the conditional expression (``>``, but also ``==``, +``!=``, ``<``, ``<=``,… would work) is actually a pandas ``Series`` of +boolean values (either ``True`` or ``False``) with the same number of +rows as the original ``DataFrame``. Such a ``Series`` of boolean values +can be used to filter the ``DataFrame`` by putting it in between the +selection brackets ``[]``. Only rows for which the value is ``True`` +will be selected. + +We now from before that the original titanic ``DataFrame`` consists of +891 rows. Let’s have a look at the amount of rows which satisfy the +condition by checking the ``shape`` attribute of the resulting +``DataFrame`` ``above_35``: + +.. ipython:: python + + above_35.shape + +.. raw:: html + +
    +
  • + +I’m interested in the titanic passengers from cabin class 2 and 3. + +.. ipython:: python + + class_23 = titanic[titanic["Pclass"].isin([2, 3])] + class_23.head() + +Similar to the conditional expression, the :func:`~Series.isin` conditional function +returns a ``True`` for each row the values are in the provided list. To +filter the rows based on such a function, use the conditional function +inside the selection brackets ``[]``. In this case, the condition inside +the selection brackets ``titanic["Pclass"].isin([2, 3])`` checks for +which rows the ``Pclass`` column is either 2 or 3. + +.. raw:: html + +
  • +
+ +The above is equivalent to filtering by rows for which the class is +either 2 or 3 and combining the two statements with an ``|`` (or) +operator: + +.. ipython:: python + + class_23 = titanic[(titanic["Pclass"] == 2) | (titanic["Pclass"] == 3)] + class_23.head() + +.. note:: + When combining multiple conditional statements, each condition + must be surrounded by parentheses ``()``. Moreover, you can not use + ``or``/``and`` but need to use the ``or`` operator ``|`` and the ``and`` + operator ``&``. + +.. raw:: html + +
+ To user guide + +See the dedicated section in the user guide about :ref:`boolean indexing ` or about the :ref:`isin function `. + +.. raw:: html + +
+ +.. raw:: html + +
    +
  • + +I want to work with passenger data for which the age is known. + +.. ipython:: python + + age_no_na = titanic[titanic["Age"].notna()] + age_no_na.head() + +The :meth:`~Series.notna` conditional function returns a ``True`` for each row the +values are not an ``Null`` value. As such, this can be combined with the +selection brackets ``[]`` to filter the data table. + +.. raw:: html + +
  • +
+ +You might wonder what actually changed, as the first 5 lines are still +the same values. One way to verify is to check if the shape has changed: + +.. ipython:: python + + age_no_na.shape + +.. raw:: html + +
+ To user guide + +For more dedicated functions on missing values, see the user guide section about :ref:`handling missing data `. + +.. raw:: html + +
+ +How do I select specific rows and columns from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_columns_rows.svg + :align: center + +.. raw:: html + +
    +
  • + +I’m interested in the names of the passengers older than 35 years. + +.. ipython:: python + + adult_names = titanic.loc[titanic["Age"] > 35, "Name"] + adult_names.head() + +In this case, a subset of both rows and columns is made in one go and +just using selection brackets ``[]`` is not sufficient anymore. The +``loc``/``iloc`` operators are required in front of the selection +brackets ``[]``. When using ``loc``/``iloc``, the part before the comma +is the rows you want, and the part after the comma is the columns you +want to select. + +.. raw:: html + +
  • +
+ +When using the column names, row labels or a condition expression, use +the ``loc`` operator in front of the selection brackets ``[]``. For both +the part before and after the comma, you can use a single label, a list +of labels, a slice of labels, a conditional expression or a colon. Using +a colon specificies you want to select all rows or columns. + +.. raw:: html + +
    +
  • + +I’m interested in rows 10 till 25 and columns 3 to 5. + +.. ipython:: python + + titanic.iloc[9:25, 2:5] + +Again, a subset of both rows and columns is made in one go and just +using selection brackets ``[]`` is not sufficient anymore. When +specifically interested in certain rows and/or columns based on their +position in the table, use the ``iloc`` operator in front of the +selection brackets ``[]``. + +.. raw:: html + +
  • +
+ +When selecting specific rows and/or columns with ``loc`` or ``iloc``, +new values can be assigned to the selected data. For example, to assign +the name ``anonymous`` to the first 3 elements of the third column: + +.. ipython:: python + + titanic.iloc[0:3, 3] = "anonymous" + titanic.head() + +.. raw:: html + +
+ To user guide + +See the user guide section on :ref:`different choices for indexing ` to get more insight in the usage of ``loc`` and ``iloc``. + +.. raw:: html + +
+ +.. raw:: html + +
+

REMEMBER

+ +- When selecting subsets of data, square brackets ``[]`` are used. +- Inside these brackets, you can use a single column/row label, a list + of column/row labels, a slice of labels, a conditional expression or + a colon. +- Select specific rows and/or columns using ``loc`` when using the row + and column names +- Select specific rows and/or columns using ``iloc`` when using the + positions in the table +- You can assign new values to a selection based on ``loc``/``iloc``. + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +A full overview about indexing is provided in the user guide pages on :ref:`indexing and selecting data `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst new file mode 100644 index 0000000000000..f3d99ee56359a --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst @@ -0,0 +1,252 @@ +.. _10min_tut_04_plotting: + +{{ header }} + +.. ipython:: python + + import pandas as pd + import matplotlib.pyplot as plt + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +For this tutorial, air quality data about :math:`NO_2` is used, made +available by `openaq `__ and using the +`py-openaq `__ package. +The ``air_quality_no2.csv`` data set provides :math:`NO_2` values for +the measurement stations *FR04014*, *BETR801* and *London Westminster* +in respectively Paris, Antwerp and London. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2.csv", + index_col=0, parse_dates=True) + air_quality.head() + +.. note:: + The usage of the ``index_col`` and ``parse_dates`` parameters of the ``read_csv`` function to define the first (0th) column as + index of the resulting ``DataFrame`` and convert the dates in the column to :class:`Timestamp` objects, respectively. + +.. raw:: html + +
  • +
+
+ +How to create plots in pandas? +------------------------------ + +.. image:: ../../_static/schemas/04_plot_overview.svg + :align: center + +.. raw:: html + +
    +
  • + +I want a quick visual check of the data. + +.. ipython:: python + + @savefig 04_airqual_quick.png + air_quality.plot() + +With a ``DataFrame``, pandas creates by default one line plot for each of +the columns with numeric data. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
    +
  • + +I want to plot only the columns of the data table with the data from Paris. + +.. ipython:: python + + @savefig 04_airqual_paris.png + air_quality["station_paris"].plot() + +To plot a specific column, use the selection method of the +:ref:`subset data tutorial <10min_tut_03_subset>` in combination with the :meth:`~DataFrame.plot` +method. Hence, the :meth:`~DataFrame.plot` method works on both ``Series`` and +``DataFrame``. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
    +
  • + +I want to visually compare the :math:`N0_2` values measured in London versus Paris. + +.. ipython:: python + + @savefig 04_airqual_scatter.png + air_quality.plot.scatter(x="station_london", + y="station_paris", + alpha=0.5) + +.. raw:: html + +
  • +
+ +Apart from the default ``line`` plot when using the ``plot`` function, a +number of alternatives are available to plot data. Let’s use some +standard Python to get an overview of the available plot methods: + +.. ipython:: python + + [method_name for method_name in dir(air_quality.plot) + if not method_name.startswith("_")] + +.. note:: + In many development environments as well as ipython and + jupyter notebook, use the TAB button to get an overview of the available + methods, for example ``air_quality.plot.`` + TAB. + +One of the options is :meth:`DataFrame.plot.box`, which refers to a +`boxplot `__. The ``box`` +method is applicable on the air quality example data: + +.. ipython:: python + + @savefig 04_airqual_boxplot.png + air_quality.plot.box() + +.. raw:: html + +
+ To user guide + +For an introduction to plots other than the default line plot, see the user guide section about :ref:`supported plot styles `. + +.. raw:: html + +
+ +.. raw:: html + +
    +
  • + +I want each of the columns in a separate subplot. + +.. ipython:: python + + @savefig 04_airqual_area_subplot.png + axs = air_quality.plot.area(figsize=(12, 4), subplots=True) + +Separate subplots for each of the data columns is supported by the ``subplots`` argument +of the ``plot`` functions. The builtin options available in each of the pandas plot +functions that are worthwhile to have a look. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +Some more formatting options are explained in the user guide section on :ref:`plot formatting `. + +.. raw:: html + +
+ +.. raw:: html + +
    +
  • + +I want to further customize, extend or save the resulting plot. + +.. ipython:: python + + fig, axs = plt.subplots(figsize=(12, 4)); + air_quality.plot.area(ax=axs); + @savefig 04_airqual_customized.png + axs.set_ylabel("NO$_2$ concentration"); + fig.savefig("no2_concentrations.png") + +.. ipython:: python + :suppress: + + import os + os.remove('no2_concentrations.png') + +.. raw:: html + +
  • +
+ +Each of the plot objects created by pandas are a +`matplotlib `__ object. As Matplotlib provides +plenty of options to customize plots, making the link between pandas and +Matplotlib explicit enables all the power of matplotlib to the plot. +This strategy is applied in the previous example: + +:: + + fig, axs = plt.subplots(figsize=(12, 4)) # Create an empty matplotlib Figure and Axes + air_quality.plot.area(ax=axs) # Use pandas to put the area plot on the prepared Figure/Axes + axs.set_ylabel("NO$_2$ concentration") # Do any matplotlib customization you like + fig.savefig("no2_concentrations.png") # Save the Figure/Axes using the existing matplotlib method. + +.. raw:: html + +
+

REMEMBER

+ +- The ``.plot.*`` methods are applicable on both Series and DataFrames +- By default, each of the columns is plotted as a different element + (line, boxplot,…) +- Any plot created by pandas is a Matplotlib object. + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +A full overview of plotting in pandas is provided in the :ref:`visualization pages `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst new file mode 100644 index 0000000000000..d4f6a8d6bb4a2 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst @@ -0,0 +1,186 @@ +.. _10min_tut_05_columns: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +For this tutorial, air quality data about :math:`NO_2` is used, made +available by `openaq `__ and using the +`py-openaq `__ package. +The ``air_quality_no2.csv`` data set provides :math:`NO_2` values for +the measurement stations *FR04014*, *BETR801* and *London Westminster* +in respectively Paris, Antwerp and London. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2.csv", + index_col=0, parse_dates=True) + air_quality.head() + +.. raw:: html + +
  • +
+
+ +How to create new columns derived from existing columns? +-------------------------------------------------------- + +.. image:: ../../_static/schemas/05_newcolumn_1.svg + :align: center + +.. raw:: html + +
    +
  • + +I want to express the :math:`NO_2` concentration of the station in London in mg/m\ :math:`^3` + +(*If we assume temperature of 25 degrees Celsius and pressure of 1013 +hPa, the conversion factor is 1.882*) + +.. ipython:: python + + air_quality["london_mg_per_cubic"] = air_quality["station_london"] * 1.882 + air_quality.head() + +To create a new column, use the ``[]`` brackets with the new column name +at the left side of the assignment. + +.. raw:: html + +
  • +
+ +.. note:: + The calculation of the values is done **element_wise**. This + means all values in the given column are multiplied by the value 1.882 + at once. You do not need to use a loop to iterate each of the rows! + +.. image:: ../../_static/schemas/05_newcolumn_2.svg + :align: center + +.. raw:: html + +
    +
  • + +I want to check the ratio of the values in Paris versus Antwerp and save the result in a new column + +.. ipython:: python + + air_quality["ratio_paris_antwerp"] = \ + air_quality["station_paris"] / air_quality["station_antwerp"] + air_quality.head() + +The calculation is again element-wise, so the ``/`` is applied *for the +values in each row*. + +.. raw:: html + +
  • +
+ +Also other mathematical operators (+, -, \*, /) or +logical operators (<, >, =,…) work element wise. The latter was already +used in the :ref:`subset data tutorial <10min_tut_03_subset>` to filter +rows of a table using a conditional expression. + +.. raw:: html + +
    +
  • + +I want to rename the data columns to the corresponding station identifiers used by openAQ + +.. ipython:: python + + air_quality_renamed = air_quality.rename( + columns={"station_antwerp": "BETR801", + "station_paris": "FR04014", + "station_london": "London Westminster"}) + +.. ipython:: python + + air_quality_renamed.head() + +The :meth:`~DataFrame.rename` function can be used for both row labels and column +labels. Provide a dictionary with the keys the current names and the +values the new names to update the corresponding names. + +.. raw:: html + +
  • +
+ +The mapping should not be restricted to fixed names only, but can be a +mapping function as well. For example, converting the column names to +lowercase letters can be done using a function as well: + +.. ipython:: python + + air_quality_renamed = air_quality_renamed.rename(columns=str.lower) + air_quality_renamed.head() + +.. raw:: html + +
+ To user guide + +Details about column or row label renaming is provided in the user guide section on :ref:`renaming labels `. + +.. raw:: html + +
+ +.. raw:: html + +
+

REMEMBER

+ +- Create a new column by assigning the output to the DataFrame with a + new column name in between the ``[]``. +- Operations are element-wise, no need to loop over rows. +- Use ``rename`` with a dictionary or function to rename row labels or + column names. + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +The user guide contains a separate section on :ref:`column addition and deletion `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst new file mode 100644 index 0000000000000..7a94c90525027 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -0,0 +1,310 @@ +.. _10min_tut_06_stats: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
  • +
+
+ +How to calculate summary statistics? +------------------------------------ + +Aggregating statistics +~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_aggregate.svg + :align: center + +.. raw:: html + +
    +
  • + +What is the average age of the titanic passengers? + +.. ipython:: python + + titanic["Age"].mean() + +.. raw:: html + +
  • +
+ +Different statistics are available and can be applied to columns with +numerical data. Operations in general exclude missing data and operate +across rows by default. + +.. image:: ../../_static/schemas/06_reduction.svg + :align: center + +.. raw:: html + +
    +
  • + +What is the median age and ticket fare price of the titanic passengers? + +.. ipython:: python + + titanic[["Age", "Fare"]].median() + +The statistic applied to multiple columns of a ``DataFrame`` (the selection of two columns +return a ``DataFrame``, see the :ref:`subset data tutorial <10min_tut_03_subset>`) is calculated for each numeric column. + +.. raw:: html + +
  • +
+ +The aggregating statistic can be calculated for multiple columns at the +same time. Remember the ``describe`` function from :ref:`first tutorial <10min_tut_01_tableoriented>` tutorial? + +.. ipython:: python + + titanic[["Age", "Fare"]].describe() + +Instead of the predefined statistics, specific combinations of +aggregating statistics for given columns can be defined using the +:func:`DataFrame.agg` method: + +.. ipython:: python + + titanic.agg({'Age': ['min', 'max', 'median', 'skew'], + 'Fare': ['min', 'max', 'median', 'mean']}) + +.. raw:: html + +
+ To user guide + +Details about descriptive statistics are provided in the user guide section on :ref:`descriptive statistics `. + +.. raw:: html + +
+ + +Aggregating statistics grouped by category +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_groupby.svg + :align: center + +.. raw:: html + +
    +
  • + +What is the average age for male versus female titanic passengers? + +.. ipython:: python + + titanic[["Sex", "Age"]].groupby("Sex").mean() + +As our interest is the average age for each gender, a subselection on +these two columns is made first: ``titanic[["Sex", "Age"]]``. Next, the +:meth:`~DataFrame.groupby` method is applied on the ``Sex`` column to make a group per +category. The average age *for each gender* is calculated and +returned. + +.. raw:: html + +
  • +
+ +Calculating a given statistic (e.g. ``mean`` age) *for each category in +a column* (e.g. male/female in the ``Sex`` column) is a common pattern. +The ``groupby`` method is used to support this type of operations. More +general, this fits in the more general ``split-apply-combine`` pattern: + +- **Split** the data into groups +- **Apply** a function to each group independently +- **Combine** the results into a data structure + +The apply and combine steps are typically done together in pandas. + +In the previous example, we explicitly selected the 2 columns first. If +not, the ``mean`` method is applied to each column containing numerical +columns: + +.. ipython:: python + + titanic.groupby("Sex").mean() + +It does not make much sense to get the average value of the ``Pclass``. +if we are only interested in the average age for each gender, the +selection of columns (rectangular brackets ``[]`` as usual) is supported +on the grouped data as well: + +.. ipython:: python + + titanic.groupby("Sex")["Age"].mean() + +.. image:: ../../_static/schemas/06_groupby_select_detail.svg + :align: center + +.. note:: + The `Pclass` column contains numerical data but actually + represents 3 categories (or factors) with respectively the labels ‘1’, + ‘2’ and ‘3’. Calculating statistics on these does not make much sense. + Therefore, pandas provides a ``Categorical`` data type to handle this + type of data. More information is provided in the user guide + :ref:`categorical` section. + +.. raw:: html + +
    +
  • + +What is the mean ticket fare price for each of the sex and cabin class combinations? + +.. ipython:: python + + titanic.groupby(["Sex", "Pclass"])["Fare"].mean() + +Grouping can be done by multiple columns at the same time. Provide the +column names as a list to the :meth:`~DataFrame.groupby` method. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +A full description on the split-apply-combine approach is provided in the user guide section on :ref:`groupby operations `. + +.. raw:: html + +
+ +Count number of records by category +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_valuecounts.svg + :align: center + +.. raw:: html + +
    +
  • + +What is the number of passengers in each of the cabin classes? + +.. ipython:: python + + titanic["Pclass"].value_counts() + +The :meth:`~Series.value_counts` method counts the number of records for each +category in a column. + +.. raw:: html + +
  • +
+ +The function is a shortcut, as it is actually a groupby operation in combination with counting of the number of records +within each group: + +.. ipython:: python + + titanic.groupby("Pclass")["Pclass"].count() + +.. note:: + Both ``size`` and ``count`` can be used in combination with + ``groupby``. Whereas ``size`` includes ``NaN`` values and just provides + the number of rows (size of the table), ``count`` excludes the missing + values. In the ``value_counts`` method, use the ``dropna`` argument to + include or exclude the ``NaN`` values. + +.. raw:: html + +
+ To user guide + +The user guide has a dedicated section on ``value_counts`` , see page on :ref:`discretization `. + +.. raw:: html + +
+ +.. raw:: html + +
+

REMEMBER

+ +- Aggregation statistics can be calculated on entire columns or rows +- ``groupby`` provides the power of the *split-apply-combine* pattern +- ``value_counts`` is a convenient shortcut to count the number of + entries in each category of a variable + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +A full description on the split-apply-combine approach is provided in the user guide pages about :ref:`groupby operations `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst new file mode 100644 index 0000000000000..b28a9012a4ad9 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -0,0 +1,404 @@ +.. _10min_tut_07_reshape: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
  • +
  • + +
    +
    +

    + +This tutorial uses air quality data about :math:`NO_2` and Particulate matter less than 2.5 +micrometers, made available by +`openaq `__ and using the +`py-openaq `__ package. +The ``air_quality_long.csv`` data set provides :math:`NO_2` and +:math:`PM_{25}` values for the measurement stations *FR04014*, *BETR801* +and *London Westminster* in respectively Paris, Antwerp and London. + +The air-quality data set has the following columns: + +- city: city where the sensor is used, either Paris, Antwerp or London +- country: country where the sensor is used, either FR, BE or GB +- location: the id of the sensor, either *FR04014*, *BETR801* or + *London Westminster* +- parameter: the parameter measured by the sensor, either :math:`NO_2` + or Particulate matter +- value: the measured value +- unit: the unit of the measured parameter, in this case ‘µg/m³’ + +and the index of the ``DataFrame`` is ``datetime``, the datetime of the +measurement. + +.. note:: + The air-quality data is provided in a so-called *long format* + data representation with each observation on a separate row and each + variable a separate column of the data table. The long/narrow format is + also known as the `tidy data + format `__. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_long.csv", + index_col="date.utc", parse_dates=True) + air_quality.head() + +.. raw:: html + +
  • +
+
+ +How to reshape the layout of tables? +------------------------------------ + +Sort table rows +~~~~~~~~~~~~~~~ + +.. raw:: html + +
    +
  • + +I want to sort the titanic data according to the age of the passengers. + +.. ipython:: python + + titanic.sort_values(by="Age").head() + +.. raw:: html + +
  • +
+ +.. raw:: html + +
    +
  • + +I want to sort the titanic data according to the cabin class and age in descending order. + +.. ipython:: python + + titanic.sort_values(by=['Pclass', 'Age'], ascending=False).head() + +With :meth:`Series.sort_values`, the rows in the table are sorted according to the +defined column(s). The index will follow the row order. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +More details about sorting of tables is provided in the using guide section on :ref:`sorting data `. + +.. raw:: html + +
+ +Long to wide table format +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Let’s use a small subset of the air quality data set. We focus on +:math:`NO_2` data and only use the first two measurements of each +location (i.e. the head of each group). The subset of data will be +called ``no2_subset`` + +.. ipython:: python + + # filter for no2 data only + no2 = air_quality[air_quality["parameter"] == "no2"] + +.. ipython:: python + + # use 2 measurements (head) for each location (groupby) + no2_subset = no2.sort_index().groupby(["location"]).head(2) + no2_subset + +.. image:: ../../_static/schemas/07_pivot.svg + :align: center + +.. raw:: html + +
    +
  • + +I want the values for the three stations as separate columns next to each other + +.. ipython:: python + + no2_subset.pivot(columns="location", values="value") + +The :meth:`~pandas.pivot_table` function is purely reshaping of the data: a single value +for each index/column combination is required. + +.. raw:: html + +
  • +
+ +As pandas support plotting of multiple columns (see :ref:`plotting tutorial <10min_tut_04_plotting>`) out of the box, the conversion from +*long* to *wide* table format enables the plotting of the different time +series at the same time: + +.. ipython:: python + + no2.head() + +.. ipython:: python + + @savefig 7_reshape_columns.png + no2.pivot(columns="location", values="value").plot() + +.. note:: + When the ``index`` parameter is not defined, the existing + index (row labels) is used. + +.. raw:: html + +
+ To user guide + +For more information about :meth:`~DataFrame.pivot`, see the user guide section on :ref:`pivoting DataFrame objects `. + +.. raw:: html + +
+ +Pivot table +~~~~~~~~~~~ + +.. image:: ../../_static/schemas/07_pivot_table.svg + :align: center + +.. raw:: html + +
    +
  • + +I want the mean concentrations for :math:`NO_2` and :math:`PM_{2.5}` in each of the stations in table form + +.. ipython:: python + + air_quality.pivot_table(values="value", index="location", + columns="parameter", aggfunc="mean") + +In the case of :meth:`~DataFrame.pivot`, the data is only rearranged. When multiple +values need to be aggregated (in this specific case, the values on +different time steps) :meth:`~DataFrame.pivot_table` can be used, providing an +aggregation function (e.g. mean) on how to combine these values. + +.. raw:: html + +
  • +
+ +Pivot table is a well known concept in spreadsheet software. When +interested in summary columns for each variable separately as well, put +the ``margin`` parameter to ``True``: + +.. ipython:: python + + air_quality.pivot_table(values="value", index="location", + columns="parameter", aggfunc="mean", + margins=True) + +.. raw:: html + +
+ To user guide + +For more information about :meth:`~DataFrame.pivot_table`, see the user guide section on :ref:`pivot tables `. + +.. raw:: html + +
+ +.. note:: + If case you are wondering, :meth:`~DataFrame.pivot_table` is indeed directly linked + to :meth:`~DataFrame.groupby`. The same result can be derived by grouping on both + ``parameter`` and ``location``: + + :: + + air_quality.groupby(["parameter", "location"]).mean() + +.. raw:: html + +
+ To user guide + +Have a look at :meth:`~DataFrame.groupby` in combination with :meth:`~DataFrame.unstack` at the user guide section on :ref:`combining stats and groupby `. + +.. raw:: html + +
+ +Wide to long format +~~~~~~~~~~~~~~~~~~~ + +Starting again from the wide format table created in the previous +section: + +.. ipython:: python + + no2_pivoted = no2.pivot(columns="location", values="value").reset_index() + no2_pivoted.head() + +.. image:: ../../_static/schemas/07_melt.svg + :align: center + +.. raw:: html + +
    +
  • + +I want to collect all air quality :math:`NO_2` measurements in a single column (long format) + +.. ipython:: python + + no_2 = no2_pivoted.melt(id_vars="date.utc") + no_2.head() + +The :func:`pandas.melt` method on a ``DataFrame`` converts the data table from wide +format to long format. The column headers become the variable names in a +newly created column. + +.. raw:: html + +
  • +
+ +The solution is the short version on how to apply :func:`pandas.melt`. The method +will *melt* all columns NOT mentioned in ``id_vars`` together into two +columns: A columns with the column header names and a column with the +values itself. The latter column gets by default the name ``value``. + +The :func:`pandas.melt` method can be defined in more detail: + +.. ipython:: python + + no_2 = no2_pivoted.melt(id_vars="date.utc", + value_vars=["BETR801", + "FR04014", + "London Westminster"], + value_name="NO_2", + var_name="id_location") + no_2.head() + +The result in the same, but in more detail defined: + +- ``value_vars`` defines explicitly which columns to *melt* together +- ``value_name`` provides a custom column name for the values column + instead of the default columns name ``value`` +- ``var_name`` provides a custom column name for the columns collecting + the column header names. Otherwise it takes the index name or a + default ``variable`` + +Hence, the arguments ``value_name`` and ``var_name`` are just +user-defined names for the two generated columns. The columns to melt +are defined by ``id_vars`` and ``value_vars``. + +.. raw:: html + +
+ To user guide + +Conversion from wide to long format with :func:`pandas.melt` is explained in the user guide section on :ref:`reshaping by melt `. + +.. raw:: html + +
+ +.. raw:: html + +
+

REMEMBER

+ +- Sorting by one or more columns is supported by ``sort_values`` +- The ``pivot`` function is purely restructering of the data, + ``pivot_table`` supports aggregations +- The reverse of ``pivot`` (long to wide format) is ``melt`` (wide to + long format) + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +A full overview is available in the user guide on the pages about :ref:`reshaping and pivoting `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst new file mode 100644 index 0000000000000..f317e7a1f91b4 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst @@ -0,0 +1,326 @@ +.. _10min_tut_08_combine: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +For this tutorial, air quality data about :math:`NO_2` is used, made available by +`openaq `__ and downloaded using the +`py-openaq `__ package. + +The ``air_quality_no2_long.csv`` data set provides :math:`NO_2` +values for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + air_quality_no2 = pd.read_csv("data/air_quality_no2_long.csv", + parse_dates=True) + air_quality_no2 = air_quality_no2[["date.utc", "location", + "parameter", "value"]] + air_quality_no2.head() + +.. raw:: html + +
  • +
  • + +
    +
    +

    + +For this tutorial, air quality data about Particulate +matter less than 2.5 micrometers is used, made available by +`openaq `__ and downloaded using the +`py-openaq `__ package. + +The ``air_quality_pm25_long.csv`` data set provides :math:`PM_{25}` +values for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + air_quality_pm25 = pd.read_csv("data/air_quality_pm25_long.csv", + parse_dates=True) + air_quality_pm25 = air_quality_pm25[["date.utc", "location", + "parameter", "value"]] + air_quality_pm25.head() + +.. raw:: html + +
  • +
+
+ + +How to combine data from multiple tables? +----------------------------------------- + +Concatenating objects +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/08_concat_row.svg + :align: center + +.. raw:: html + +
    +
  • + +I want to combine the measurements of :math:`NO_2` and :math:`PM_{25}`, two tables with a similar structure, in a single table + +.. ipython:: python + + air_quality = pd.concat([air_quality_pm25, air_quality_no2], axis=0) + air_quality.head() + +The :func:`~pandas.concat` function performs concatenation operations of multiple +tables along one of the axis (row-wise or column-wise). + +.. raw:: html + +
  • +
+ +By default concatenation is along axis 0, so the resulting table combines the rows +of the input tables. Let’s check the shape of the original and the +concatenated tables to verify the operation: + +.. ipython:: python + + print('Shape of the `air_quality_pm25` table: ', air_quality_pm25.shape) + print('Shape of the `air_quality_no2` table: ', air_quality_no2.shape) + print('Shape of the resulting `air_quality` table: ', air_quality.shape) + +Hence, the resulting table has 3178 = 1110 + 2068 rows. + +.. note:: + The **axis** argument will return in a number of pandas + methods that can be applied **along an axis**. A ``DataFrame`` has two + corresponding axes: the first running vertically downwards across rows + (axis 0), and the second running horizontally across columns (axis 1). + Most operations like concatenation or summary statistics are by default + across rows (axis 0), but can be applied across columns as well. + +Sorting the table on the datetime information illustrates also the +combination of both tables, with the ``parameter`` column defining the +origin of the table (either ``no2`` from table ``air_quality_no2`` or +``pm25`` from table ``air_quality_pm25``): + +.. ipython:: python + + air_quality = air_quality.sort_values("date.utc") + air_quality.head() + +In this specific example, the ``parameter`` column provided by the data +ensures that each of the original tables can be identified. This is not +always the case. the ``concat`` function provides a convenient solution +with the ``keys`` argument, adding an additional (hierarchical) row +index. For example: + +.. ipython:: python + + air_quality_ = pd.concat([air_quality_pm25, air_quality_no2], + keys=["PM25", "NO2"]) + +.. ipython:: python + + air_quality_.head() + +.. note:: + The existence of multiple row/column indices at the same time + has not been mentioned within these tutorials. *Hierarchical indexing* + or *MultiIndex* is an advanced and powerfull pandas feature to analyze + higher dimensional data. + + Multi-indexing is out of scope for this pandas introduction. For the + moment, remember that the function ``reset_index`` can be used to + convert any level of an index to a column, e.g. + ``air_quality.reset_index(level=0)`` + + .. raw:: html + +
+ To user guide + + Feel free to dive into the world of multi-indexing at the user guide section on :ref:`advanced indexing `. + + .. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +More options on table concatenation (row and column +wise) and how ``concat`` can be used to define the logic (union or +intersection) of the indexes on the other axes is provided at the section on +:ref:`object concatenation `. + +.. raw:: html + +
+ +Join tables using a common identifier +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/08_merge_left.svg + :align: center + +.. raw:: html + +
    +
  • + +Add the station coordinates, provided by the stations metadata table, to the corresponding rows in the measurements table. + +.. warning:: + The air quality measurement station coordinates are stored in a data + file ``air_quality_stations.csv``, downloaded using the + `py-openaq `__ package. + +.. ipython:: python + + stations_coord = pd.read_csv("data/air_quality_stations.csv") + stations_coord.head() + +.. note:: + The stations used in this example (FR04014, BETR801 and London + Westminster) are just three entries enlisted in the metadata table. We + only want to add the coordinates of these three to the measurements + table, each on the corresponding rows of the ``air_quality`` table. + +.. ipython:: python + + air_quality.head() + +.. ipython:: python + + air_quality = pd.merge(air_quality, stations_coord, + how='left', on='location') + air_quality.head() + +Using the :meth:`~pandas.merge` function, for each of the rows in the +``air_quality`` table, the corresponding coordinates are added from the +``air_quality_stations_coord`` table. Both tables have the column +``location`` in common which is used as a key to combine the +information. By choosing the ``left`` join, only the locations available +in the ``air_quality`` (left) table, i.e. FR04014, BETR801 and London +Westminster, end up in the resulting table. The ``merge`` function +supports multiple join options similar to database-style operations. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
    +
  • + +Add the parameter full description and name, provided by the parameters metadata table, to the measurements table + +.. warning:: + The air quality parameters metadata are stored in a data file + ``air_quality_parameters.csv``, downloaded using the + `py-openaq `__ package. + +.. ipython:: python + + air_quality_parameters = pd.read_csv("data/air_quality_parameters.csv") + air_quality_parameters.head() + +.. ipython:: python + + air_quality = pd.merge(air_quality, air_quality_parameters, + how='left', left_on='parameter', right_on='id') + air_quality.head() + +Compared to the previous example, there is no common column name. +However, the ``parameter`` column in the ``air_quality`` table and the +``id`` column in the ``air_quality_parameters_name`` both provide the +measured variable in a common format. The ``left_on`` and ``right_on`` +arguments are used here (instead of just ``on``) to make the link +between the two tables. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +pandas supports also inner, outer, and right joins. +More information on join/merge of tables is provided in the user guide section on +:ref:`database style merging of tables `. Or have a look at the +:ref:`comparison with SQL` page. + +.. raw:: html + +
+ +.. raw:: html + +
+

REMEMBER

+ +- Multiple tables can be concatenated both column as row wise using + the ``concat`` function. +- For database-like merging/joining of tables, use the ``merge`` + function. + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +See the user guide for a full description of the various :ref:`facilities to combine data tables `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst new file mode 100644 index 0000000000000..d5b4b316130bb --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst @@ -0,0 +1,389 @@ +.. _10min_tut_09_timeseries: + +{{ header }} + +.. ipython:: python + + import pandas as pd + import matplotlib.pyplot as plt + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +For this tutorial, air quality data about :math:`NO_2` and Particulate +matter less than 2.5 micrometers is used, made available by +`openaq `__ and downloaded using the +`py-openaq `__ package. +The ``air_quality_no2_long.csv"`` data set provides :math:`NO_2` values +for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2_long.csv") + air_quality = air_quality.rename(columns={"date.utc": "datetime"}) + air_quality.head() + +.. ipython:: python + + air_quality.city.unique() + +.. raw:: html + +
  • +
+
+ +How to handle time series data with ease? +----------------------------------------- + +Using pandas datetime properties +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
    +
  • + +I want to work with the dates in the column ``datetime`` as datetime objects instead of plain text + +.. ipython:: python + + air_quality["datetime"] = pd.to_datetime(air_quality["datetime"]) + air_quality["datetime"] + +Initially, the values in ``datetime`` are character strings and do not +provide any datetime operations (e.g. extract the year, day of the +week,…). By applying the ``to_datetime`` function, pandas interprets the +strings and convert these to datetime (i.e. ``datetime64[ns, UTC]``) +objects. In pandas we call these datetime objects similar to +``datetime.datetime`` from the standard library a :class:`pandas.Timestamp`. + +.. raw:: html + +
  • +
+ +.. note:: + As many data sets do contain datetime information in one of + the columns, pandas input function like :func:`pandas.read_csv` and :func:`pandas.read_json` + can do the transformation to dates when reading the data using the + ``parse_dates`` parameter with a list of the columns to read as + Timestamp: + + :: + + pd.read_csv("../data/air_quality_no2_long.csv", parse_dates=["datetime"]) + +Why are these :class:`pandas.Timestamp` objects useful. Let’s illustrate the added +value with some example cases. + + What is the start and end date of the time series data set working + with? + +.. ipython:: python + + air_quality["datetime"].min(), air_quality["datetime"].max() + +Using :class:`pandas.Timestamp` for datetimes enable us to calculate with date +information and make them comparable. Hence, we can use this to get the +length of our time series: + +.. ipython:: python + + air_quality["datetime"].max() - air_quality["datetime"].min() + +The result is a :class:`pandas.Timedelta` object, similar to ``datetime.timedelta`` +from the standard Python library and defining a time duration. + +.. raw:: html + +
+ To user guide + +The different time concepts supported by pandas are explained in the user guide section on :ref:`time related concepts `. + +.. raw:: html + +
+ +.. raw:: html + +
    +
  • + +I want to add a new column to the ``DataFrame`` containing only the month of the measurement + +.. ipython:: python + + air_quality["month"] = air_quality["datetime"].dt.month + air_quality.head() + +By using ``Timestamp`` objects for dates, a lot of time-related +properties are provided by pandas. For example the ``month``, but also +``year``, ``weekofyear``, ``quarter``,… All of these properties are +accessible by the ``dt`` accessor. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +An overview of the existing date properties is given in the +:ref:`time and date components overview table `. More details about the ``dt`` accessor +to return datetime like properties is explained in a dedicated section on the :ref:`dt accessor `. + +.. raw:: html + +
+ +.. raw:: html + +
    +
  • + +What is the average :math:`NO_2` concentration for each day of the week for each of the measurement locations? + +.. ipython:: python + + air_quality.groupby( + [air_quality["datetime"].dt.weekday, "location"])["value"].mean() + +Remember the split-apply-combine pattern provided by ``groupby`` from the +:ref:`tutorial on statistics calculation <10min_tut_06_stats>`? +Here, we want to calculate a given statistic (e.g. mean :math:`NO_2`) +**for each weekday** and **for each measurement location**. To group on +weekdays, we use the datetime property ``weekday`` (with Monday=0 and +Sunday=6) of pandas ``Timestamp``, which is also accessible by the +``dt`` accessor. The grouping on both locations and weekdays can be done +to split the calculation of the mean on each of these combinations. + +.. danger:: + As we are working with a very short time series in these + examples, the analysis does not provide a long-term representative + result! + +.. raw:: html + +
  • +
+ +.. raw:: html + +
    +
  • + +Plot the typical :math:`NO_2` pattern during the day of our time series of all stations together. In other words, what is the average value for each hour of the day? + +.. ipython:: python + + fig, axs = plt.subplots(figsize=(12, 4)) + air_quality.groupby( + air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar', + rot=0, + ax=axs) + plt.xlabel("Hour of the day"); # custom x label using matplotlib + @savefig 09_bar_chart.png + plt.ylabel("$NO_2 (µg/m^3)$"); + +Similar to the previous case, we want to calculate a given statistic +(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the +split-apply-combine approach again. For this case, the datetime property ``hour`` +of pandas ``Timestamp``, which is also accessible by the ``dt`` accessor. + +.. raw:: html + +
  • +
+ +Datetime as index +~~~~~~~~~~~~~~~~~ + +In the :ref:`tutorial on reshaping <10min_tut_07_reshape>`, +:meth:`~pandas.pivot` was introduced to reshape the data table with each of the +measurements locations as a separate column: + +.. ipython:: python + + no_2 = air_quality.pivot(index="datetime", columns="location", values="value") + no_2.head() + +.. note:: + By pivoting the data, the datetime information became the + index of the table. In general, setting a column as an index can be + achieved by the ``set_index`` function. + +Working with a datetime index (i.e. ``DatetimeIndex``) provides powerful +functionalities. For example, we do not need the ``dt`` accessor to get +the time series properties, but have these properties available on the +index directly: + +.. ipython:: python + + no_2.index.year, no_2.index.weekday + +Some other advantages are the convenient subsetting of time period or +the adapted time scale on plots. Let’s apply this on our data. + +.. raw:: html + +
    +
  • + +Create a plot of the :math:`NO_2` values in the different stations from the 20th of May till the end of 21st of May + +.. ipython:: python + :okwarning: + + @savefig 09_time_section.png + no_2["2019-05-20":"2019-05-21"].plot(); + +By providing a **string that parses to a datetime**, a specific subset of the data can be selected on a ``DatetimeIndex``. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +More information on the ``DatetimeIndex`` and the slicing by using strings is provided in the section on :ref:`time series indexing `. + +.. raw:: html + +
+ +Resample a time series to another frequency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
    +
  • + +Aggregate the current hourly time series values to the monthly maximum value in each of the stations. + +.. ipython:: python + + monthly_max = no_2.resample("M").max() + monthly_max + +A very powerful method on time series data with a datetime index, is the +ability to :meth:`~Series.resample` time series to another frequency (e.g., +converting secondly data into 5-minutely data). + +.. raw:: html + +
  • +
+ +The :meth:`~Series.resample` method is similar to a groupby operation: + +- it provides a time-based grouping, by using a string (e.g. ``M``, + ``5H``,…) that defines the target frequency +- it requires an aggregation function such as ``mean``, ``max``,… + +.. raw:: html + +
+ To user guide + +An overview of the aliases used to define time series frequencies is given in the :ref:`offset aliases overview table `. + +.. raw:: html + +
+ +When defined, the frequency of the time series is provided by the +``freq`` attribute: + +.. ipython:: python + + monthly_max.index.freq + +.. raw:: html + +
    +
  • + +Make a plot of the daily median :math:`NO_2` value in each of the stations. + +.. ipython:: python + :okwarning: + + @savefig 09_resample_mean.png + no_2.resample("D").mean().plot(style="-o", figsize=(10, 5)); + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +More details on the power of time series ``resampling`` is provided in the user gudie section on :ref:`resampling `. + +.. raw:: html + +
+ +.. raw:: html + +
+

REMEMBER

+ +- Valid date strings can be converted to datetime objects using + ``to_datetime`` function or as part of read functions. +- Datetime objects in pandas supports calculations, logical operations + and convenient date-related properties using the ``dt`` accessor. +- A ``DatetimeIndex`` contains these date-related properties and + supports convenient slicing. +- ``Resample`` is a powerful method to change the frequency of a time + series. + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +A full overview on time series is given in the pages on :ref:`time series and date functionality `. + +.. raw:: html + +
\ No newline at end of file diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst new file mode 100644 index 0000000000000..3ff64875d807b --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst @@ -0,0 +1,278 @@ +.. _10min_tut_10_text: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
+
+
+ Data used for this tutorial: +
+
+
    +
  • + +
    +
    +

    + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

    + To raw data +
    +
    + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
  • +
+
+ +How to manipulate textual data? +------------------------------- + +.. raw:: html + +
    +
  • + +Make all name characters lowercase + +.. ipython:: python + + titanic["Name"].str.lower() + +To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column +(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and +apply the ``lower`` method. As such, each of the strings is converted element wise. + +.. raw:: html + +
  • +
+ +Similar to datetime objects in the :ref:`time series tutorial <10min_tut_09_timeseries>` +having a ``dt`` accessor, a number of +specialized string methods are available when using the ``str`` +accessor. These methods have in general matching names with the +equivalent built-in string methods for single elements, but are applied +element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?) +on each of the values of the columns. + +.. raw:: html + +
    +
  • + +Create a new column ``Surname`` that contains the surname of the Passengers by extracting the part before the comma. + +.. ipython:: python + + titanic["Name"].str.split(",") + +Using the :meth:`Series.str.split` method, each of the values is returned as a list of +2 elements. The first element is the part before the comma and the +second element the part after the comma. + +.. ipython:: python + + titanic["Surname"] = titanic["Name"].str.split(",").str.get(0) + titanic["Surname"] + +As we are only interested in the first part representing the surname +(element 0), we can again use the ``str`` accessor and apply :meth:`Series.str.get` to +extract the relevant part. Indeed, these string functions can be +concatenated to combine multiple functions at once! + +.. raw:: html + +
  • +
+ +.. raw:: html + +
+ To user guide + +More information on extracting parts of strings is available in the user guide section on :ref:`splitting and replacing strings `. + +.. raw:: html + +
+ +.. raw:: html + +
    +
  • + +Extract the passenger data about the Countess on board of the Titanic. + +.. ipython:: python + + titanic["Name"].str.contains("Countess") + +.. ipython:: python + + titanic[titanic["Name"].str.contains("Countess")] + +(*Interested in her story? See*\ `Wikipedia `__\ *!*) + +The string method :meth:`Series.str.contains` checks for each of the values in the +column ``Name`` if the string contains the word ``Countess`` and returns +for each of the values ``True`` (``Countess`` is part of the name) of +``False`` (``Countess`` is notpart of the name). This output can be used +to subselect the data using conditional (boolean) indexing introduced in +the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was +only 1 Countess on the Titanic, we get one row as a result. + +.. raw:: html + +
  • +
+ +.. note:: + More powerful extractions on strings is supported, as the + :meth:`Series.str.contains` and :meth:`Series.str.extract` methods accepts `regular + expressions `__, but out of + scope of this tutorial. + +.. raw:: html + +
+ To user guide + +More information on extracting parts of strings is available in the user guide section on :ref:`string matching and extracting `. + +.. raw:: html + +
+ +.. raw:: html + +
    +
  • + +Which passenger of the titanic has the longest name? + +.. ipython:: python + + titanic["Name"].str.len() + +To get the longest name we first have to get the lenghts of each of the +names in the ``Name`` column. By using pandas string methods, the +:meth:`Series.str.len` function is applied to each of the names individually +(element-wise). + +.. ipython:: python + + titanic["Name"].str.len().idxmax() + +Next, we need to get the corresponding location, preferably the index +label, in the table for which the name length is the largest. The +:meth:`~Series.idxmax`` method does exactly that. It is not a string method and is +applied to integers, so no ``str`` is used. + +.. ipython:: python + + titanic.loc[titanic["Name"].str.len().idxmax(), "Name"] + +Based on the index name of the row (``307``) and the column (``Name``), +we can do a selection using the ``loc`` operator, introduced in the +`tutorial on subsetting <3_subset_data.ipynb>`__. + +.. raw:: html + +
  • +
+ +.. raw:: html + +
    +
  • + +In the ‘Sex’ columns, replace values of ‘male’ by ‘M’ and all ‘female’ values by ‘F’ + +.. ipython:: python + + titanic["Sex_short"] = titanic["Sex"].replace({"male": "M", + "female": "F"}) + titanic["Sex_short"] + +Whereas :meth:`~Series.replace` is not a string method, it provides a convenient way +to use mappings or vocabularies to translate certain values. It requires +a ``dictionary`` to define the mapping ``{from : to}``. + +.. raw:: html + +
  • +
+ +.. warning:: + There is also a :meth:`~Series.str.replace` methods available to replace a + specific set of characters. However, when having a mapping of multiple + values, this would become: + + :: + + titanic["Sex_short"] = titanic["Sex"].str.replace("female", "F") + titanic["Sex_short"] = titanic["Sex_short"].str.replace("male", "M") + + This would become cumbersome and easily lead to mistakes. Just think (or + try out yourself) what would happen if those two statements are applied + in the opposite order… + +.. raw:: html + +
+

REMEMBER

+ +- String methods are available using the ``str`` accessor. +- String methods work element wise and can be used for conditional + indexing. +- The ``replace`` method is a convenient method to convert values + according to a given dictionary. + +.. raw:: html + +
+ +.. raw:: html + +
+ To user guide + +A full overview is provided in the user guide pages on :ref:`working with text data `. + +.. raw:: html + +
diff --git a/doc/source/getting_started/intro_tutorials/index.rst b/doc/source/getting_started/intro_tutorials/index.rst new file mode 100644 index 0000000000000..28e7610866461 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/index.rst @@ -0,0 +1,22 @@ +{{ header }} + +.. _10times1minute: + +========================= +Getting started tutorials +========================= + +.. toctree:: + :maxdepth: 1 + + 01_table_oriented + 02_read_write + 03_subset_data + 04_plotting + 05_add_columns + 06_calculate_statistics + 07_reshape_table_layout + 08_combine_dataframes + 09_timeseries + 10_text_data + diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index 5690bb2e4a875..7eb25790f6a7a 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -104,6 +104,7 @@ programming language. {% if single_doc and single_doc.endswith('.rst') -%} .. toctree:: :maxdepth: 3 + :titlesonly: {{ single_doc[:-4] }} {% elif single_doc %} @@ -115,6 +116,7 @@ programming language. .. toctree:: :maxdepth: 3 :hidden: + :titlesonly: {% endif %} {% if not single_doc %} What's New in 1.1.0 diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index b28354cd8b5f2..bbec9a770477d 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -6,6 +6,8 @@ Reshaping and pivot tables ************************** +.. _reshaping.reshaping: + Reshaping by pivoting DataFrame objects --------------------------------------- @@ -314,6 +316,8 @@ user-friendly. dft pd.wide_to_long(dft, ["A", "B"], i="id", j="year") +.. _reshaping.combine_with_groupby: + Combining with stats and GroupBy -------------------------------- diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index 88c86ac212f11..2e4d0fecaf5cf 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -189,12 +189,11 @@ and replacing any remaining whitespaces with underscores: Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few exceptions, other uses are not supported, and may be disabled at a later point. +.. _text.split: Splitting and replacing strings ------------------------------- -.. _text.split: - Methods like ``split`` return a Series of lists: .. ipython:: python From 84444538a88721c5ee74de8836b716d3c1adc4b8 Mon Sep 17 00:00:00 2001 From: Justin Zheng Date: Mon, 17 Feb 2020 08:59:52 -0800 Subject: [PATCH 058/388] BUG: list-like to_replace on Categorical.replace is ignored or crash (#31734) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_testing.py | 12 ++++- pandas/core/arrays/categorical.py | 30 ++++++++---- .../tests/arrays/categorical/test_replace.py | 48 +++++++++++++++++++ 4 files changed, 81 insertions(+), 10 deletions(-) create mode 100644 pandas/tests/arrays/categorical/test_replace.py diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 0216007ea5ba8..19358689a2186 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -31,6 +31,7 @@ Bug fixes **Categorical** - Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) +- Bug in :class:`Categorical` that would ignore or crash when calling :meth:`Series.replace` with a list-like ``to_replace`` (:issue:`31720`) **I/O** diff --git a/pandas/_testing.py b/pandas/_testing.py index b19905f1c3b5d..7ebf2c282f8c9 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1074,6 +1074,7 @@ def assert_series_equal( check_exact=False, check_datetimelike_compat=False, check_categorical=True, + check_category_order=True, obj="Series", ): """ @@ -1108,6 +1109,10 @@ def assert_series_equal( Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. + check_category_order : bool, default True + Whether to compare category order of internal Categoricals + + .. versionadded:: 1.0.2 obj : str, default 'Series' Specify object name being compared, internally used to show appropriate assertion message. @@ -1210,7 +1215,12 @@ def assert_series_equal( if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): - assert_categorical_equal(left.values, right.values, obj=f"{obj} category") + assert_categorical_equal( + left.values, + right.values, + obj=f"{obj} category", + check_category_order=check_category_order, + ) # This could be refactored to use the NDFrame.equals method diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 19602010fd882..d469b574820f9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2441,18 +2441,30 @@ def replace(self, to_replace, value, inplace: bool = False): """ inplace = validate_bool_kwarg(inplace, "inplace") cat = self if inplace else self.copy() - if to_replace in cat.categories: - if isna(value): - cat.remove_categories(to_replace, inplace=True) - else: + + # build a dict of (to replace -> value) pairs + if is_list_like(to_replace): + # if to_replace is list-like and value is scalar + replace_dict = {replace_value: value for replace_value in to_replace} + else: + # if both to_replace and value are scalar + replace_dict = {to_replace: value} + + # other cases, like if both to_replace and value are list-like or if + # to_replace is a dict, are handled separately in NDFrame + for replace_value, new_value in replace_dict.items(): + if replace_value in cat.categories: + if isna(new_value): + cat.remove_categories(replace_value, inplace=True) + continue categories = cat.categories.tolist() - index = categories.index(to_replace) - if value in cat.categories: - value_index = categories.index(value) + index = categories.index(replace_value) + if new_value in cat.categories: + value_index = categories.index(new_value) cat._codes[cat._codes == index] = value_index - cat.remove_categories(to_replace, inplace=True) + cat.remove_categories(replace_value, inplace=True) else: - categories[index] = value + categories[index] = new_value cat.rename_categories(categories, inplace=True) if not inplace: return cat diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py new file mode 100644 index 0000000000000..52530123bd52f --- /dev/null +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -0,0 +1,48 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_replace,value,expected,check_types,check_categorical", + [ + # one-to-one + (1, 2, [2, 2, 3], True, True), + (1, 4, [4, 2, 3], True, True), + (4, 1, [1, 2, 3], True, True), + (5, 6, [1, 2, 3], True, True), + # many-to-one + ([1], 2, [2, 2, 3], True, True), + ([1, 2], 3, [3, 3, 3], True, True), + ([1, 2], 4, [4, 4, 3], True, True), + ((1, 2, 4), 5, [5, 5, 3], True, True), + ((5, 6), 2, [1, 2, 3], True, True), + # many-to-many, handled outside of Categorical and results in separate dtype + ([1], [2], [2, 2, 3], False, False), + ([1, 4], [5, 2], [5, 2, 3], False, False), + # check_categorical sorts categories, which crashes on mixed dtypes + (3, "4", [1, 2, "4"], True, False), + ([1, 2, "3"], "5", ["5", "5", 3], True, False), + ], +) +def test_replace(to_replace, value, expected, check_types, check_categorical): + # GH 31720 + s = pd.Series([1, 2, 3], dtype="category") + result = s.replace(to_replace, value) + expected = pd.Series(expected, dtype="category") + s.replace(to_replace, value, inplace=True) + tm.assert_series_equal( + expected, + result, + check_dtype=check_types, + check_categorical=check_categorical, + check_category_order=False, + ) + tm.assert_series_equal( + expected, + s, + check_dtype=check_types, + check_categorical=check_categorical, + check_category_order=False, + ) From c81b0ba9292fe04502dbe759e3520c03818167e2 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Mon, 17 Feb 2020 14:50:32 -0500 Subject: [PATCH 059/388] Clean Up C Warnings (#31935) --- pandas/_libs/algos.pyx | 6 +++--- pandas/_libs/hashing.pyx | 4 ++-- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/window/aggregations.pyx | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index dd1f38ce3a842..5f3d946a1e024 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1173,12 +1173,12 @@ ctypedef fused out_t: @cython.boundscheck(False) @cython.wraparound(False) -def diff_2d(ndarray[diff_t, ndim=2] arr, - ndarray[out_t, ndim=2] out, +def diff_2d(diff_t[:, :] arr, + out_t[:, :] out, Py_ssize_t periods, int axis): cdef: Py_ssize_t i, j, sx, sy, start, stop - bint f_contig = arr.flags.f_contiguous + bint f_contig = arr.is_f_contig() # Disable for unsupported dtype combinations, # see https://github.com/cython/cython/issues/2646 diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 878da670b2f68..2d859db22ea23 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -5,7 +5,7 @@ import cython from libc.stdlib cimport malloc, free import numpy as np -from numpy cimport uint8_t, uint32_t, uint64_t, import_array +from numpy cimport ndarray, uint8_t, uint32_t, uint64_t, import_array import_array() from pandas._libs.util cimport is_nan @@ -15,7 +15,7 @@ DEF dROUNDS = 4 @cython.boundscheck(False) -def hash_object_array(object[:] arr, object key, object encoding='utf8'): +def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'): """ Parameters ---------- diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index bf38fcfb6103c..57b4100fbceb0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -152,7 +152,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): @cython.boundscheck(False) @cython.wraparound(False) -def datetime_to_datetime64(object[:] values): +def datetime_to_datetime64(ndarray[object] values): """ Convert ndarray of datetime-like objects to int64 array representing nanosecond timestamps. diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index f675818599b2c..80b9144042041 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -56,7 +56,7 @@ cdef: cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b -cdef inline bint is_monotonic_start_end_bounds( +cdef bint is_monotonic_start_end_bounds( ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end ): return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] From 92bb4c9517c41503d42571ea697ea2c81f3a189c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Feb 2020 11:53:58 -0800 Subject: [PATCH 060/388] REF: implement unpack_1tuple to clean up Series.__getitem__ (#31906) --- pandas/core/indexers.py | 27 +++++++++++++++++++++++++++ pandas/core/series.py | 35 +++++------------------------------ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index cadae9da6092f..4fb42fce29e1a 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -270,6 +270,33 @@ def deprecate_ndim_indexing(result): ) +def unpack_1tuple(tup): + """ + If we have a length-1 tuple/list that contains a slice, unpack to just + the slice. + + Notes + ----- + The list case is deprecated. + """ + if len(tup) == 1 and isinstance(tup[0], slice): + # if we don't have a MultiIndex, we may still be able to handle + # a 1-tuple. see test_1tuple_without_multiindex + + if isinstance(tup, list): + # GH#31299 + warnings.warn( + "Indexing with a single-item list containing a " + "slice is deprecated and will raise in a future " + "version. Pass a tuple instead.", + FutureWarning, + stacklevel=3, + ) + + return tup[0] + return tup + + # ----------------------------------------------------------- # Public indexer validation diff --git a/pandas/core/series.py b/pandas/core/series.py index 256586f3d36a1..15fe0bb98b536 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -72,7 +72,7 @@ sanitize_array, ) from pandas.core.generic import NDFrame -from pandas.core.indexers import maybe_convert_indices +from pandas.core.indexers import maybe_convert_indices, unpack_1tuple from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( Float64Index, @@ -851,6 +851,8 @@ def __getitem__(self, key): key_is_scalar = is_scalar(key) if key_is_scalar: key = self.index._convert_scalar_indexer(key, kind="getitem") + elif isinstance(key, (list, tuple)): + key = unpack_1tuple(key) if key_is_scalar or isinstance(self.index, MultiIndex): # Otherwise index.get_value will raise InvalidIndexError @@ -893,16 +895,7 @@ def _get_with(self, key): "supported, use the appropriate DataFrame column" ) elif isinstance(key, tuple): - try: - return self._get_values_tuple(key) - except ValueError: - # if we don't have a MultiIndex, we may still be able to handle - # a 1-tuple. see test_1tuple_without_multiindex - if len(key) == 1: - key = key[0] - if isinstance(key, slice): - return self._get_values(key) - raise + return self._get_values_tuple(key) if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)): key = list(key) @@ -924,26 +917,8 @@ def _get_with(self, key): else: return self.iloc[key] - if isinstance(key, (list, tuple)): - # TODO: de-dup with tuple case handled above? + if isinstance(key, list): # handle the dup indexing case GH#4246 - if len(key) == 1 and isinstance(key[0], slice): - # [slice(0, 5, None)] will break if you convert to ndarray, - # e.g. as requested by np.median - # FIXME: hack - if isinstance(key, list): - # GH#31299 - warnings.warn( - "Indexing with a single-item list containing a " - "slice is deprecated and will raise in a future " - "version. Pass a tuple instead.", - FutureWarning, - stacklevel=3, - ) - # TODO: use a message more like numpy's? - key = tuple(key) - return self._get_values(key) - return self.loc[key] return self.reindex(key) From cea40592e61d9451aca6c66108b9d7a38e3f9b6a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Feb 2020 12:01:51 -0800 Subject: [PATCH 061/388] REF: move loc-only methods to loc (#31859) --- pandas/core/indexing.py | 61 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 46017377f2b9c..36140d3213ce1 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -577,18 +577,6 @@ def __call__(self, axis=None): new_self.axis = axis return new_self - def _get_label(self, label, axis: int): - if self.ndim == 1: - # for perf reasons we want to try _xs first - # as its basically direct indexing - # but will fail when the index is not present - # see GH5667 - return self.obj._xs(label, axis=axis) - elif isinstance(label, tuple) and isinstance(label[axis], slice): - raise IndexingError("no slices here, handle elsewhere") - - return self.obj._xs(label, axis=axis) - def _get_setitem_indexer(self, key): """ Convert a potentially-label-based key into a positional indexer. @@ -700,23 +688,6 @@ def _convert_tuple(self, key, is_setter: bool = False): keyidx.append(idx) return tuple(keyidx) - def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): - # we have an axis0 multi-index, handle or raise - axis = self.axis or 0 - try: - # fast path for series or for tup devoid of slices - return self._get_label(tup, axis=axis) - except TypeError: - # slices are unhashable - pass - except KeyError as ek: - # raise KeyError if number of indexers match - # else IndexingError will be raised - if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: - raise ek - - return None - def _getitem_tuple_same_dim(self, tup: Tuple): """ Index with indexers that should return an object of the same dimension @@ -798,6 +769,9 @@ def _getitem_nested_tuple(self, tup: Tuple): # multi-index dimension, try to see if we have something like # a tuple passed to a series with a multi-index if len(tup) > self.ndim: + if self.name != "loc": + # This should never be reached, but lets be explicit about it + raise ValueError("Too many indices") result = self._handle_lowerdim_multi_index_axis0(tup) if result is not None: return result @@ -1069,6 +1043,35 @@ def _getitem_tuple(self, tup: Tuple): return self._getitem_tuple_same_dim(tup) + def _get_label(self, label, axis: int): + if self.ndim == 1: + # for perf reasons we want to try _xs first + # as its basically direct indexing + # but will fail when the index is not present + # see GH5667 + return self.obj._xs(label, axis=axis) + elif isinstance(label, tuple) and isinstance(label[axis], slice): + raise IndexingError("no slices here, handle elsewhere") + + return self.obj._xs(label, axis=axis) + + def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): + # we have an axis0 multi-index, handle or raise + axis = self.axis or 0 + try: + # fast path for series or for tup devoid of slices + return self._get_label(tup, axis=axis) + except TypeError: + # slices are unhashable + pass + except KeyError as ek: + # raise KeyError if number of indexers match + # else IndexingError will be raised + if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: + raise ek + + return None + def _getitem_axis(self, key, axis: int): key = item_from_zerodim(key) if is_iterator(key): From 3b4b86b833e7f765ca035cc2067897f8ae89fac2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Mon, 17 Feb 2020 14:03:05 -0600 Subject: [PATCH 062/388] CLN: Clean reductions/test_reductions.py (#32035) --- pandas/tests/reductions/test_reductions.py | 127 +++++++++++---------- 1 file changed, 66 insertions(+), 61 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 0b312fe2f8990..211d0d52d8357 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -66,60 +66,64 @@ def test_ops(self, opname, obj): expected = expected.astype("M8[ns]").astype("int64") assert result.value == expected - def test_nanops(self): + @pytest.mark.parametrize("opname", ["max", "min"]) + def test_nanops(self, opname, index_or_series): # GH#7261 - for opname in ["max", "min"]: - for klass in [Index, Series]: - arg_op = "arg" + opname if klass is Index else "idx" + opname - - obj = klass([np.nan, 2.0]) - assert getattr(obj, opname)() == 2.0 - - obj = klass([np.nan]) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([], dtype=object) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([pd.NaT, datetime(2011, 11, 1)]) - # check DatetimeIndex monotonic path - assert getattr(obj, opname)() == datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) - # check DatetimeIndex non-monotonic path - assert getattr(obj, opname)(), datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - for dtype in ["M8[ns]", "datetime64[ns, UTC]"]: - # cases with empty Series/DatetimeIndex - obj = klass([], dtype=dtype) - - assert getattr(obj, opname)() is pd.NaT - assert getattr(obj, opname)(skipna=False) is pd.NaT - - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)() - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)(skipna=False) - - # argmin/max + klass = index_or_series + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([np.nan, 2.0]) + assert getattr(obj, opname)() == 2.0 + + obj = klass([np.nan]) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([], dtype=object) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([pd.NaT, datetime(2011, 11, 1)]) + # check DatetimeIndex monotonic path + assert getattr(obj, opname)() == datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) + # check DatetimeIndex non-monotonic path + assert getattr(obj, opname)(), datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"]) + def test_nanops_empty_object(self, opname, index_or_series, dtype): + klass = index_or_series + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([], dtype=dtype) + + assert getattr(obj, opname)() is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) + + def test_argminmax(self): obj = Index(np.arange(5, dtype="int64")) assert obj.argmin() == 0 assert obj.argmax() == 4 @@ -224,16 +228,17 @@ def test_minmax_timedelta64(self): assert idx.argmin() == 0 assert idx.argmax() == 2 - for op in ["min", "max"]: - # Return NaT - obj = TimedeltaIndex([]) - assert pd.isna(getattr(obj, op)()) + @pytest.mark.parametrize("op", ["min", "max"]) + def test_minmax_timedelta_empty_or_na(self, op): + # Return NaT + obj = TimedeltaIndex([]) + assert getattr(obj, op)() is pd.NaT - obj = TimedeltaIndex([pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT]) + assert getattr(obj, op)() is pd.NaT - obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + assert getattr(obj, op)() is pd.NaT def test_numpy_minmax_timedelta64(self): td = timedelta_range("16815 days", "16820 days", freq="D") From 7b0887c2ea7255139be1cc16a179e0b4574384d2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Feb 2020 15:35:21 -0600 Subject: [PATCH 063/388] DOC: pin gitdb2 (#32064) --- environment.yml | 1 + requirements-dev.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 5f1184e921119..cbdaf8e6c4217 100644 --- a/environment.yml +++ b/environment.yml @@ -26,6 +26,7 @@ dependencies: # documentation - gitpython # obtain contributors from git for whatsnew + - gitdb2=2.0.6 # GH-32060 - sphinx # documentation (jupyter notebooks) diff --git a/requirements-dev.txt b/requirements-dev.txt index 08cbef2c7fc6b..a469cbdd93ceb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,6 +15,7 @@ isort mypy==0.730 pycodestyle gitpython +gitdb2==2.0.6 sphinx nbconvert>=5.4.1 nbsphinx From 3da053c7d1b1786689a81b143a7afb57eb4762ad Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Feb 2020 16:15:45 -0800 Subject: [PATCH 064/388] BUG: fix length_of_indexer with boolean mask (#31897) --- pandas/core/indexers.py | 8 +++++++- pandas/core/indexing.py | 28 +++++++------------------- pandas/tests/indexing/test_indexers.py | 11 ++++++++++ 3 files changed, 25 insertions(+), 22 deletions(-) create mode 100644 pandas/tests/indexing/test_indexers.py diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 4fb42fce29e1a..cb48d4be75c4d 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -219,7 +219,7 @@ def maybe_convert_indices(indices, n: int): def length_of_indexer(indexer, target=None) -> int: """ - Return the length of a single non-tuple indexer which could be a slice. + Return the expected length of target[indexer] Returns ------- @@ -245,6 +245,12 @@ def length_of_indexer(indexer, target=None) -> int: step = -step return (stop - start + step - 1) // step elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): + if isinstance(indexer, list): + indexer = np.array(indexer) + + if indexer.dtype == bool: + # GH#25774 + return indexer.sum() return len(indexer) elif not is_list_like_indexer(indexer): return 1 diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 36140d3213ce1..d3e75d43c6bd7 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1690,31 +1690,17 @@ def _setitem_with_indexer(self, indexer, value): plane_indexer = tuple([idx]) + indexer[info_axis + 1 :] lplane_indexer = length_of_indexer(plane_indexer[0], index) + # lplane_indexer gives the expected length of obj[idx] # require that we are setting the right number of values that # we are indexing - if ( - is_list_like_indexer(value) - and np.iterable(value) - and lplane_indexer != len(value) - ): - - if len(obj[idx]) != len(value): - raise ValueError( - "cannot set using a multi-index " - "selection indexer with a different " - "length than the value" - ) - - # make sure we have an ndarray - value = getattr(value, "values", value).ravel() + if is_list_like_indexer(value) and lplane_indexer != len(value): - # we can directly set the series here - obj._consolidate_inplace() - obj = obj.copy() - obj._data = obj._data.setitem(indexer=tuple([idx]), value=value) - self.obj[item] = obj - return + raise ValueError( + "cannot set using a multi-index " + "selection indexer with a different " + "length than the value" + ) # non-mi else: diff --git a/pandas/tests/indexing/test_indexers.py b/pandas/tests/indexing/test_indexers.py new file mode 100644 index 0000000000000..173f33b19f8d5 --- /dev/null +++ b/pandas/tests/indexing/test_indexers.py @@ -0,0 +1,11 @@ +# Tests aimed at pandas.core.indexers +import numpy as np + +from pandas.core.indexers import length_of_indexer + + +def test_length_of_indexer(): + arr = np.zeros(4, dtype=bool) + arr[0] = 1 + result = length_of_indexer(arr) + assert result == 1 From ebeb407bfc608d1fcf1e1921747441765d569589 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 18 Feb 2020 09:16:32 +0100 Subject: [PATCH 065/388] WEB: update blog link to only include my pandas blog posts (#32051) --- web/pandas/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/config.yml b/web/pandas/config.yml index 83eb152c9d944..a52c580f23530 100644 --- a/web/pandas/config.yml +++ b/web/pandas/config.yml @@ -54,7 +54,7 @@ blog: - https://dev.pandas.io/pandas-blog/feeds/all.atom.xml - https://wesmckinney.com/feeds/pandas.atom.xml - https://tomaugspurger.github.io/feed - - https://jorisvandenbossche.github.io/feeds/all.atom.xml + - https://jorisvandenbossche.github.io/feeds/pandas.atom.xml - https://datapythonista.github.io/blog/feeds/pandas.atom.xml - https://numfocus.org/tag/pandas/feed/ maintainers: From bdc7fd06f6e48450498873bc89a42955dc5d89d9 Mon Sep 17 00:00:00 2001 From: Patrick Cando <32943309+pcandoalmeida@users.noreply.github.com> Date: Tue, 18 Feb 2020 11:56:21 +0000 Subject: [PATCH 066/388] CLN: GH29547 replace old string formatting (#32063) --- pandas/tests/tslibs/test_parsing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index c452d5b12ce01..1ba7832c47e6c 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -42,9 +42,9 @@ def test_parse_time_quarter_with_dash(dashed, normal): @pytest.mark.parametrize("dashed", ["-2Q1992", "2-Q1992", "4-4Q1992"]) def test_parse_time_quarter_with_dash_error(dashed): - msg = "Unknown datetime string format, unable to parse: {dashed}" + msg = f"Unknown datetime string format, unable to parse: {dashed}" - with pytest.raises(parsing.DateParseError, match=msg.format(dashed=dashed)): + with pytest.raises(parsing.DateParseError, match=msg): parse_time_string(dashed) @@ -115,12 +115,12 @@ def test_parsers_quarter_invalid(date_str): if date_str == "6Q-20": msg = ( "Incorrect quarterly string is given, quarter " - "must be between 1 and 4: {date_str}" + f"must be between 1 and 4: {date_str}" ) else: - msg = "Unknown datetime string format, unable to parse: {date_str}" + msg = f"Unknown datetime string format, unable to parse: {date_str}" - with pytest.raises(ValueError, match=msg.format(date_str=date_str)): + with pytest.raises(ValueError, match=msg): parsing.parse_time_string(date_str) From 9c06b30f9e6bc0c2ed15b851c85ae4077a189313 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Feb 2020 04:38:28 -0800 Subject: [PATCH 067/388] CLN: remove unused from MultiIndex (#32030) --- pandas/core/indexes/multi.py | 67 +------------------------------ pandas/tests/util/test_hashing.py | 17 -------- 2 files changed, 1 insertion(+), 83 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 02e11c0e71cbe..0a79df0cc9744 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -58,7 +58,6 @@ indexer_from_factorized, lexsort_indexer, ) -from pandas.core.util.hashing import hash_tuple, hash_tuples from pandas.io.formats.printing import ( format_object_attrs, @@ -247,6 +246,7 @@ class MultiIndex(Index): rename = Index.set_names _tuples = None + sortorder: Optional[int] # -------------------------------------------------------------------- # Constructors @@ -1430,55 +1430,11 @@ def is_monotonic_decreasing(self) -> bool: # monotonic decreasing if and only if reverse is monotonic increasing return self[::-1].is_monotonic_increasing - @cache_readonly - def _have_mixed_levels(self): - """ return a boolean list indicated if we have mixed levels """ - return ["mixed" in l for l in self._inferred_type_levels] - @cache_readonly def _inferred_type_levels(self): """ return a list of the inferred types, one for each level """ return [i.inferred_type for i in self.levels] - @cache_readonly - def _hashed_values(self): - """ return a uint64 ndarray of my hashed values """ - return hash_tuples(self) - - def _hashed_indexing_key(self, key): - """ - validate and return the hash for the provided key - - *this is internal for use for the cython routines* - - Parameters - ---------- - key : string or tuple - - Returns - ------- - np.uint64 - - Notes - ----- - we need to stringify if we have mixed levels - """ - if not isinstance(key, tuple): - return hash_tuples(key) - - if not len(key) == self.nlevels: - raise KeyError - - def f(k, stringify): - if stringify and not isinstance(k, str): - k = str(k) - return k - - key = tuple( - f(k, stringify) for k, stringify in zip(key, self._have_mixed_levels) - ) - return hash_tuple(key) - @Appender(Index.duplicated.__doc__) def duplicated(self, keep="first"): shape = map(len, self.levels) @@ -1858,27 +1814,6 @@ def __reduce__(self): ) return ibase._new_Index, (type(self), d), None - def __setstate__(self, state): - """Necessary for making this object picklable""" - if isinstance(state, dict): - levels = state.get("levels") - codes = state.get("codes") - sortorder = state.get("sortorder") - names = state.get("names") - - elif isinstance(state, tuple): - - nd_state, own_state = state - levels, codes, sortorder, names = own_state - - self._set_levels([Index(x) for x in levels], validate=False) - self._set_codes(codes) - new_codes = self._verify_integrity() - self._set_codes(new_codes) - self._set_names(names) - self.sortorder = sortorder - self._reset_identity() - # -------------------------------------------------------------------- def __getitem__(self, key): diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index c856585f20138..6411b9ab654f1 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -178,23 +178,6 @@ def test_multiindex_objects(): assert mi.equals(recons) assert Index(mi.values).equals(Index(recons.values)) - # _hashed_values and hash_pandas_object(..., index=False) equivalency. - expected = hash_pandas_object(mi, index=False).values - result = mi._hashed_values - - tm.assert_numpy_array_equal(result, expected) - - expected = hash_pandas_object(recons, index=False).values - result = recons._hashed_values - - tm.assert_numpy_array_equal(result, expected) - - expected = mi._hashed_values - result = recons._hashed_values - - # Values should match, but in different order. - tm.assert_numpy_array_equal(np.sort(result), np.sort(expected)) - @pytest.mark.parametrize( "obj", From f4dc9f9028a3da539126f9a8a37e7c41fc7b4b3c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Feb 2020 16:26:27 -0800 Subject: [PATCH 068/388] REGR: fix op(frame, frame2) with reindex (#31679) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/ops/__init__.py | 60 ++++++++++++++++++++++++++- pandas/tests/frame/test_arithmetic.py | 19 +++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 19358689a2186..c9031ac1ae9fe 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) +- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index f3c1a609d50a1..b74dea686a89f 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -5,7 +5,7 @@ """ import datetime import operator -from typing import Optional, Set, Tuple, Union +from typing import TYPE_CHECKING, Optional, Set, Tuple, Union import numpy as np @@ -61,6 +61,9 @@ rxor, ) +if TYPE_CHECKING: + from pandas import DataFrame # noqa:F401 + # ----------------------------------------------------------------------------- # constants ARITHMETIC_BINOPS: Set[str] = { @@ -703,6 +706,58 @@ def to_series(right): return left, right +def _should_reindex_frame_op( + left: "DataFrame", right, axis, default_axis: int, fill_value, level +) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + assert isinstance(left, ABCDataFrame) + + if not isinstance(right, ABCDataFrame): + return False + + if fill_value is None and level is None and axis is default_axis: + # TODO: any other cases we should handle here? + cols = left.columns.intersection(right.columns) + if not (cols.equals(left.columns) and cols.equals(right.columns)): + return True + + return False + + +def _frame_arith_method_with_reindex( + left: "DataFrame", right: "DataFrame", op +) -> "DataFrame": + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + left : DataFrame + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + # GH#31623, only operate on shared columns + cols = left.columns.intersection(right.columns) + + new_left = left[cols] + new_right = right[cols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using _align_method_FRAME + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + return result.reindex(join_columns, axis=1) + + def _arith_method_FRAME(cls, op, special): str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @@ -720,6 +775,9 @@ def _arith_method_FRAME(cls, op, special): @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): + if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level): + return _frame_arith_method_with_reindex(self, other, op) + self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index c6eacf2bbcd84..44ad55517dcea 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -711,6 +711,25 @@ def test_operations_with_interval_categories_index(self, all_arithmetic_operator expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind) tm.assert_frame_equal(result, expected) + def test_frame_with_frame_reindex(self): + # GH#31623 + df = pd.DataFrame( + { + "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")], + "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")], + }, + columns=["foo", "bar"], + ) + df2 = df[["foo"]] + + result = df - df2 + + expected = pd.DataFrame( + {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]}, + columns=["bar", "foo"], + ) + tm.assert_frame_equal(result, expected) + def test_frame_with_zero_len_series_corner_cases(): # GH#28600 From aa1089f5568927bd660a6b5d824b20d456703929 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 19 Feb 2020 00:45:02 +0000 Subject: [PATCH 069/388] CLN: Move info (#31876) --- pandas/core/frame.py | 282 +------------------ pandas/io/formats/info.py | 288 +++++++++++++++++++ pandas/tests/frame/test_repr_info.py | 357 ----------------------- pandas/tests/io/formats/test_info.py | 405 +++++++++++++++++++++++++++ 4 files changed, 696 insertions(+), 636 deletions(-) create mode 100644 pandas/io/formats/info.py create mode 100644 pandas/tests/io/formats/test_info.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4ad80273f77ba..e4f36e128059b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -14,7 +14,6 @@ import datetime from io import StringIO import itertools -import sys from textwrap import dedent from typing import ( IO, @@ -131,7 +130,7 @@ from pandas.io.common import get_filepath_or_buffer from pandas.io.formats import console, format as fmt -from pandas.io.formats.printing import pprint_thing +from pandas.io.formats.info import info import pandas.plotting if TYPE_CHECKING: @@ -2225,282 +2224,11 @@ def to_html( ) # ---------------------------------------------------------------------- - + @Appender(info.__doc__) def info( self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None ) -> None: - """ - Print a concise summary of a DataFrame. - - This method prints information about a DataFrame including - the index dtype and column dtypes, non-null values and memory usage. - - Parameters - ---------- - verbose : bool, optional - Whether to print the full summary. By default, the setting in - ``pandas.options.display.max_info_columns`` is followed. - buf : writable buffer, defaults to sys.stdout - Where to send the output. By default, the output is printed to - sys.stdout. Pass a writable buffer if you need to further process - the output. - max_cols : int, optional - When to switch from the verbose to the truncated output. If the - DataFrame has more than `max_cols` columns, the truncated output - is used. By default, the setting in - ``pandas.options.display.max_info_columns`` is used. - memory_usage : bool, str, optional - Specifies whether total memory usage of the DataFrame - elements (including the index) should be displayed. By default, - this follows the ``pandas.options.display.memory_usage`` setting. - - True always show memory usage. False never shows memory usage. - A value of 'deep' is equivalent to "True with deep introspection". - Memory usage is shown in human-readable units (base-2 - representation). Without deep introspection a memory estimation is - made based in column dtype and number of rows assuming values - consume the same memory amount for corresponding dtypes. With deep - memory introspection, a real memory usage calculation is performed - at the cost of computational resources. - null_counts : bool, optional - Whether to show the non-null counts. By default, this is shown - only if the frame is smaller than - ``pandas.options.display.max_info_rows`` and - ``pandas.options.display.max_info_columns``. A value of True always - shows the counts, and False never shows the counts. - - Returns - ------- - None - This method prints a summary of a DataFrame and returns None. - - See Also - -------- - DataFrame.describe: Generate descriptive statistics of DataFrame - columns. - DataFrame.memory_usage: Memory usage of DataFrame columns. - - Examples - -------- - >>> int_values = [1, 2, 3, 4, 5] - >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] - >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] - >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, - ... "float_col": float_values}) - >>> df - int_col text_col float_col - 0 1 alpha 0.00 - 1 2 beta 0.25 - 2 3 gamma 0.50 - 3 4 delta 0.75 - 4 5 epsilon 1.00 - - Prints information of all columns: - - >>> df.info(verbose=True) - - RangeIndex: 5 entries, 0 to 4 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 int_col 5 non-null int64 - 1 text_col 5 non-null object - 2 float_col 5 non-null float64 - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes - - Prints a summary of columns count and its dtypes but not per column - information: - - >>> df.info(verbose=False) - - RangeIndex: 5 entries, 0 to 4 - Columns: 3 entries, int_col to float_col - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes - - Pipe output of DataFrame.info to buffer instead of sys.stdout, get - buffer content and writes to a text file: - - >>> import io - >>> buffer = io.StringIO() - >>> df.info(buf=buffer) - >>> s = buffer.getvalue() - >>> with open("df_info.txt", "w", - ... encoding="utf-8") as f: # doctest: +SKIP - ... f.write(s) - 260 - - The `memory_usage` parameter allows deep introspection mode, specially - useful for big DataFrames and fine-tune memory optimization: - - >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) - >>> df = pd.DataFrame({ - ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), - ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), - ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) - ... }) - >>> df.info() - - RangeIndex: 1000000 entries, 0 to 999999 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 22.9+ MB - - >>> df.info(memory_usage='deep') - - RangeIndex: 1000000 entries, 0 to 999999 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 188.8 MB - """ - if buf is None: # pragma: no cover - buf = sys.stdout - - lines = [] - - lines.append(str(type(self))) - lines.append(self.index._summary()) - - if len(self.columns) == 0: - lines.append(f"Empty {type(self).__name__}") - fmt.buffer_put_lines(buf, lines) - return - - cols = self.columns - col_count = len(self.columns) - - # hack - if max_cols is None: - max_cols = get_option("display.max_info_columns", len(self.columns) + 1) - - max_rows = get_option("display.max_info_rows", len(self) + 1) - - if null_counts is None: - show_counts = (col_count <= max_cols) and (len(self) < max_rows) - else: - show_counts = null_counts - exceeds_info_cols = col_count > max_cols - - def _verbose_repr(): - lines.append(f"Data columns (total {len(self.columns)} columns):") - - id_head = " # " - column_head = "Column" - col_space = 2 - - max_col = max(len(pprint_thing(k)) for k in cols) - len_column = len(pprint_thing(column_head)) - space = max(max_col, len_column) + col_space - - max_id = len(pprint_thing(col_count)) - len_id = len(pprint_thing(id_head)) - space_num = max(max_id, len_id) + col_space - counts = None - - header = _put_str(id_head, space_num) + _put_str(column_head, space) - if show_counts: - counts = self.count() - if len(cols) != len(counts): # pragma: no cover - raise AssertionError( - f"Columns must equal counts ({len(cols)} != {len(counts)})" - ) - count_header = "Non-Null Count" - len_count = len(count_header) - non_null = " non-null" - max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null) - space_count = max(len_count, max_count) + col_space - count_temp = "{count}" + non_null - else: - count_header = "" - space_count = len(count_header) - len_count = space_count - count_temp = "{count}" - - dtype_header = "Dtype" - len_dtype = len(dtype_header) - max_dtypes = max(len(pprint_thing(k)) for k in self.dtypes) - space_dtype = max(len_dtype, max_dtypes) - header += _put_str(count_header, space_count) + _put_str( - dtype_header, space_dtype - ) - - lines.append(header) - lines.append( - _put_str("-" * len_id, space_num) - + _put_str("-" * len_column, space) - + _put_str("-" * len_count, space_count) - + _put_str("-" * len_dtype, space_dtype) - ) - - for i, col in enumerate(self.columns): - dtype = self.dtypes.iloc[i] - col = pprint_thing(col) - - line_no = _put_str(f" {i}", space_num) - count = "" - if show_counts: - count = counts.iloc[i] - - lines.append( - line_no - + _put_str(col, space) - + _put_str(count_temp.format(count=count), space_count) - + _put_str(dtype, space_dtype) - ) - - def _non_verbose_repr(): - lines.append(self.columns._summary(name="Columns")) - - def _sizeof_fmt(num, size_qualifier): - # returns size in human readable format - for x in ["bytes", "KB", "MB", "GB", "TB"]: - if num < 1024.0: - return f"{num:3.1f}{size_qualifier} {x}" - num /= 1024.0 - return f"{num:3.1f}{size_qualifier} PB" - - if verbose: - _verbose_repr() - elif verbose is False: # specifically set to False, not nesc None - _non_verbose_repr() - else: - if exceeds_info_cols: - _non_verbose_repr() - else: - _verbose_repr() - - counts = self._data.get_dtype_counts() - dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] - lines.append(f"dtypes: {', '.join(dtypes)}") - - if memory_usage is None: - memory_usage = get_option("display.memory_usage") - if memory_usage: - # append memory usage of df to display - size_qualifier = "" - if memory_usage == "deep": - deep = True - else: - # size_qualifier is just a best effort; not guaranteed to catch - # all cases (e.g., it misses categorical data even with object - # categories) - deep = False - if "object" in counts or self.index._is_memory_usage_qualified(): - size_qualifier = "+" - mem_usage = self.memory_usage(index=True, deep=deep).sum() - lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") - fmt.buffer_put_lines(buf, lines) + return info(self, verbose, buf, max_cols, memory_usage, null_counts) def memory_usage(self, index=True, deep=False) -> Series: """ @@ -8623,7 +8351,3 @@ def _from_nested_dict(data): new_data[col] = new_data.get(col, {}) new_data[col][index] = v return new_data - - -def _put_str(s, space): - return str(s)[:space].ljust(space) diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py new file mode 100644 index 0000000000000..0c08065f55273 --- /dev/null +++ b/pandas/io/formats/info.py @@ -0,0 +1,288 @@ +import sys + +from pandas._config import get_option + +from pandas.io.formats import format as fmt +from pandas.io.formats.printing import pprint_thing + + +def _put_str(s, space): + return str(s)[:space].ljust(space) + + +def info( + data, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None +) -> None: + """ + Print a concise summary of a DataFrame. + + This method prints information about a DataFrame including + the index dtype and column dtypes, non-null values and memory usage. + + Parameters + ---------- + data : DataFrame + DataFrame to print information about. + verbose : bool, optional + Whether to print the full summary. By default, the setting in + ``pandas.options.display.max_info_columns`` is followed. + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + max_cols : int, optional + When to switch from the verbose to the truncated output. If the + DataFrame has more than `max_cols` columns, the truncated output + is used. By default, the setting in + ``pandas.options.display.max_info_columns`` is used. + memory_usage : bool, str, optional + Specifies whether total memory usage of the DataFrame + elements (including the index) should be displayed. By default, + this follows the ``pandas.options.display.memory_usage`` setting. + + True always show memory usage. False never shows memory usage. + A value of 'deep' is equivalent to "True with deep introspection". + Memory usage is shown in human-readable units (base-2 + representation). Without deep introspection a memory estimation is + made based in column dtype and number of rows assuming values + consume the same memory amount for corresponding dtypes. With deep + memory introspection, a real memory usage calculation is performed + at the cost of computational resources. + null_counts : bool, optional + Whether to show the non-null counts. By default, this is shown + only if the frame is smaller than + ``pandas.options.display.max_info_rows`` and + ``pandas.options.display.max_info_columns``. A value of True always + shows the counts, and False never shows the counts. + + Returns + ------- + None + This method prints a summary of a DataFrame and returns None. + + See Also + -------- + DataFrame.describe: Generate descriptive statistics of DataFrame + columns. + DataFrame.memory_usage: Memory usage of DataFrame columns. + + Examples + -------- + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] + >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] + >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, + ... "float_col": float_values}) + >>> df + int_col text_col float_col + 0 1 alpha 0.00 + 1 2 beta 0.25 + 2 3 gamma 0.50 + 3 4 delta 0.75 + 4 5 epsilon 1.00 + + Prints information of all columns: + + >>> df.info(verbose=True) + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Prints a summary of columns count and its dtypes but not per column + information: + + >>> df.info(verbose=False) + + RangeIndex: 5 entries, 0 to 4 + Columns: 3 entries, int_col to float_col + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Pipe output of DataFrame.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> df.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", + ... encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big DataFrames and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) + >>> df = pd.DataFrame({ + ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) + ... }) + >>> df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 22.9+ MB + + >>> df.info(memory_usage='deep') + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 188.8 MB + """ + if buf is None: # pragma: no cover + buf = sys.stdout + + lines = [] + + lines.append(str(type(data))) + lines.append(data.index._summary()) + + if len(data.columns) == 0: + lines.append(f"Empty {type(data).__name__}") + fmt.buffer_put_lines(buf, lines) + return + + cols = data.columns + col_count = len(data.columns) + + # hack + if max_cols is None: + max_cols = get_option("display.max_info_columns", len(data.columns) + 1) + + max_rows = get_option("display.max_info_rows", len(data) + 1) + + if null_counts is None: + show_counts = (col_count <= max_cols) and (len(data) < max_rows) + else: + show_counts = null_counts + exceeds_info_cols = col_count > max_cols + + def _verbose_repr(): + lines.append(f"Data columns (total {len(data.columns)} columns):") + + id_head = " # " + column_head = "Column" + col_space = 2 + + max_col = max(len(pprint_thing(k)) for k in cols) + len_column = len(pprint_thing(column_head)) + space = max(max_col, len_column) + col_space + + max_id = len(pprint_thing(col_count)) + len_id = len(pprint_thing(id_head)) + space_num = max(max_id, len_id) + col_space + + header = _put_str(id_head, space_num) + _put_str(column_head, space) + if show_counts: + counts = data.count() + if len(cols) != len(counts): # pragma: no cover + raise AssertionError( + f"Columns must equal counts ({len(cols)} != {len(counts)})" + ) + count_header = "Non-Null Count" + len_count = len(count_header) + non_null = " non-null" + max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null) + space_count = max(len_count, max_count) + col_space + count_temp = "{count}" + non_null + else: + count_header = "" + space_count = len(count_header) + len_count = space_count + count_temp = "{count}" + + dtype_header = "Dtype" + len_dtype = len(dtype_header) + max_dtypes = max(len(pprint_thing(k)) for k in data.dtypes) + space_dtype = max(len_dtype, max_dtypes) + header += _put_str(count_header, space_count) + _put_str( + dtype_header, space_dtype + ) + + lines.append(header) + lines.append( + _put_str("-" * len_id, space_num) + + _put_str("-" * len_column, space) + + _put_str("-" * len_count, space_count) + + _put_str("-" * len_dtype, space_dtype) + ) + + for i, col in enumerate(data.columns): + dtype = data.dtypes.iloc[i] + col = pprint_thing(col) + + line_no = _put_str(f" {i}", space_num) + count = "" + if show_counts: + count = counts.iloc[i] + + lines.append( + line_no + + _put_str(col, space) + + _put_str(count_temp.format(count=count), space_count) + + _put_str(dtype, space_dtype) + ) + + def _non_verbose_repr(): + lines.append(data.columns._summary(name="Columns")) + + def _sizeof_fmt(num, size_qualifier): + # returns size in human readable format + for x in ["bytes", "KB", "MB", "GB", "TB"]: + if num < 1024.0: + return f"{num:3.1f}{size_qualifier} {x}" + num /= 1024.0 + return f"{num:3.1f}{size_qualifier} PB" + + if verbose: + _verbose_repr() + elif verbose is False: # specifically set to False, not nesc None + _non_verbose_repr() + else: + if exceeds_info_cols: + _non_verbose_repr() + else: + _verbose_repr() + + counts = data._data.get_dtype_counts() + dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] + lines.append(f"dtypes: {', '.join(dtypes)}") + + if memory_usage is None: + memory_usage = get_option("display.memory_usage") + if memory_usage: + # append memory usage of df to display + size_qualifier = "" + if memory_usage == "deep": + deep = True + else: + # size_qualifier is just a best effort; not guaranteed to catch + # all cases (e.g., it misses categorical data even with object + # categories) + deep = False + if "object" in counts or data.index._is_memory_usage_qualified(): + size_qualifier = "+" + mem_usage = data.memory_usage(index=True, deep=deep).sum() + lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") + fmt.buffer_put_lines(buf, lines) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 4ac009ef508c4..c5d4d59adbc35 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -1,16 +1,10 @@ from datetime import datetime, timedelta from io import StringIO -import re -import sys -import textwrap import warnings import numpy as np import pytest -from pandas.compat import PYPY - -import pandas as pd from pandas import ( Categorical, DataFrame, @@ -192,357 +186,6 @@ def test_latex_repr(self): # GH 12182 assert df._repr_latex_() is None - def test_info(self, float_frame, datetime_frame): - io = StringIO() - float_frame.info(buf=io) - datetime_frame.info(buf=io) - - frame = DataFrame(np.random.randn(5, 3)) - - frame.info() - frame.info(verbose=False) - - def test_info_verbose(self): - buf = StringIO() - size = 1001 - start = 5 - frame = DataFrame(np.random.randn(3, size)) - frame.info(verbose=True, buf=buf) - - res = buf.getvalue() - header = " # Column Dtype \n--- ------ ----- " - assert header in res - - frame.info(verbose=True, buf=buf) - buf.seek(0) - lines = buf.readlines() - assert len(lines) > 0 - - for i, line in enumerate(lines): - if i >= start and i < start + size: - line_nr = f" {i - start} " - assert line.startswith(line_nr) - - def test_info_memory(self): - # https://github.com/pandas-dev/pandas/issues/21056 - df = pd.DataFrame({"a": pd.Series([1, 2], dtype="i8")}) - buf = StringIO() - df.info(buf=buf) - result = buf.getvalue() - bytes = float(df.memory_usage().sum()) - - expected = textwrap.dedent( - f"""\ - - RangeIndex: 2 entries, 0 to 1 - Data columns (total 1 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 a 2 non-null int64 - dtypes: int64(1) - memory usage: {bytes} bytes - """ - ) - - assert result == expected - - def test_info_wide(self): - from pandas import set_option, reset_option - - io = StringIO() - df = DataFrame(np.random.randn(5, 101)) - df.info(buf=io) - - io = StringIO() - df.info(buf=io, max_cols=101) - rs = io.getvalue() - assert len(rs.splitlines()) > 100 - xp = rs - - set_option("display.max_info_columns", 101) - io = StringIO() - df.info(buf=io) - assert rs == xp - reset_option("display.max_info_columns") - - def test_info_duplicate_columns(self): - io = StringIO() - - # it works! - frame = DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"]) - frame.info(buf=io) - - def test_info_duplicate_columns_shows_correct_dtypes(self): - # GH11761 - io = StringIO() - - frame = DataFrame([[1, 2.0]], columns=["a", "a"]) - frame.info(buf=io) - io.seek(0) - lines = io.readlines() - assert " 0 a 1 non-null int64 \n" == lines[5] - assert " 1 a 1 non-null float64\n" == lines[6] - - def test_info_shows_column_dtypes(self): - dtypes = [ - "int64", - "float64", - "datetime64[ns]", - "timedelta64[ns]", - "complex128", - "object", - "bool", - ] - data = {} - n = 10 - for i, dtype in enumerate(dtypes): - data[i] = np.random.randint(2, size=n).astype(dtype) - df = DataFrame(data) - buf = StringIO() - df.info(buf=buf) - res = buf.getvalue() - header = ( - " # Column Non-Null Count Dtype \n" - "--- ------ -------------- ----- " - ) - assert header in res - for i, dtype in enumerate(dtypes): - name = f" {i:d} {i:d} {n:d} non-null {dtype}" - assert name in res - - def test_info_max_cols(self): - df = DataFrame(np.random.randn(10, 5)) - for len_, verbose in [(5, None), (5, False), (12, True)]: - # For verbose always ^ setting ^ summarize ^ full output - with option_context("max_info_columns", 4): - buf = StringIO() - df.info(buf=buf, verbose=verbose) - res = buf.getvalue() - assert len(res.strip().split("\n")) == len_ - - for len_, verbose in [(12, None), (5, False), (12, True)]: - - # max_cols not exceeded - with option_context("max_info_columns", 5): - buf = StringIO() - df.info(buf=buf, verbose=verbose) - res = buf.getvalue() - assert len(res.strip().split("\n")) == len_ - - for len_, max_cols in [(12, 5), (5, 4)]: - # setting truncates - with option_context("max_info_columns", 4): - buf = StringIO() - df.info(buf=buf, max_cols=max_cols) - res = buf.getvalue() - assert len(res.strip().split("\n")) == len_ - - # setting wouldn't truncate - with option_context("max_info_columns", 5): - buf = StringIO() - df.info(buf=buf, max_cols=max_cols) - res = buf.getvalue() - assert len(res.strip().split("\n")) == len_ - - def test_info_memory_usage(self): - # Ensure memory usage is displayed, when asserted, on the last line - dtypes = [ - "int64", - "float64", - "datetime64[ns]", - "timedelta64[ns]", - "complex128", - "object", - "bool", - ] - data = {} - n = 10 - for i, dtype in enumerate(dtypes): - data[i] = np.random.randint(2, size=n).astype(dtype) - df = DataFrame(data) - buf = StringIO() - - # display memory usage case - df.info(buf=buf, memory_usage=True) - res = buf.getvalue().splitlines() - assert "memory usage: " in res[-1] - - # do not display memory usage case - df.info(buf=buf, memory_usage=False) - res = buf.getvalue().splitlines() - assert "memory usage: " not in res[-1] - - df.info(buf=buf, memory_usage=True) - res = buf.getvalue().splitlines() - - # memory usage is a lower bound, so print it as XYZ+ MB - assert re.match(r"memory usage: [^+]+\+", res[-1]) - - df.iloc[:, :5].info(buf=buf, memory_usage=True) - res = buf.getvalue().splitlines() - - # excluded column with object dtype, so estimate is accurate - assert not re.match(r"memory usage: [^+]+\+", res[-1]) - - # Test a DataFrame with duplicate columns - dtypes = ["int64", "int64", "int64", "float64"] - data = {} - n = 100 - for i, dtype in enumerate(dtypes): - data[i] = np.random.randint(2, size=n).astype(dtype) - df = DataFrame(data) - df.columns = dtypes - - df_with_object_index = pd.DataFrame({"a": [1]}, index=["foo"]) - df_with_object_index.info(buf=buf, memory_usage=True) - res = buf.getvalue().splitlines() - assert re.match(r"memory usage: [^+]+\+", res[-1]) - - df_with_object_index.info(buf=buf, memory_usage="deep") - res = buf.getvalue().splitlines() - assert re.match(r"memory usage: [^+]+$", res[-1]) - - # Ensure df size is as expected - # (cols * rows * bytes) + index size - df_size = df.memory_usage().sum() - exp_size = len(dtypes) * n * 8 + df.index.nbytes - assert df_size == exp_size - - # Ensure number of cols in memory_usage is the same as df - size_df = np.size(df.columns.values) + 1 # index=True; default - assert size_df == np.size(df.memory_usage()) - - # assert deep works only on object - assert df.memory_usage().sum() == df.memory_usage(deep=True).sum() - - # test for validity - DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True) - DataFrame(1, index=["a"], columns=["A"]).index.nbytes - df = DataFrame( - data=1, - index=pd.MultiIndex.from_product([["a"], range(1000)]), - columns=["A"], - ) - df.index.nbytes - df.memory_usage(index=True) - df.index.values.nbytes - - mem = df.memory_usage(deep=True).sum() - assert mem > 0 - - @pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") - def test_info_memory_usage_deep_not_pypy(self): - df_with_object_index = pd.DataFrame({"a": [1]}, index=["foo"]) - assert ( - df_with_object_index.memory_usage(index=True, deep=True).sum() - > df_with_object_index.memory_usage(index=True).sum() - ) - - df_object = pd.DataFrame({"a": ["a"]}) - assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum() - - @pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result") - def test_info_memory_usage_deep_pypy(self): - df_with_object_index = pd.DataFrame({"a": [1]}, index=["foo"]) - assert ( - df_with_object_index.memory_usage(index=True, deep=True).sum() - == df_with_object_index.memory_usage(index=True).sum() - ) - - df_object = pd.DataFrame({"a": ["a"]}) - assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum() - - @pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design") - def test_usage_via_getsizeof(self): - df = DataFrame( - data=1, - index=pd.MultiIndex.from_product([["a"], range(1000)]), - columns=["A"], - ) - mem = df.memory_usage(deep=True).sum() - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = mem - sys.getsizeof(df) - assert abs(diff) < 100 - - def test_info_memory_usage_qualified(self): - - buf = StringIO() - df = DataFrame(1, columns=list("ab"), index=[1, 2, 3]) - df.info(buf=buf) - assert "+" not in buf.getvalue() - - buf = StringIO() - df = DataFrame(1, columns=list("ab"), index=list("ABC")) - df.info(buf=buf) - assert "+" in buf.getvalue() - - buf = StringIO() - df = DataFrame( - 1, - columns=list("ab"), - index=pd.MultiIndex.from_product([range(3), range(3)]), - ) - df.info(buf=buf) - assert "+" not in buf.getvalue() - - buf = StringIO() - df = DataFrame( - 1, - columns=list("ab"), - index=pd.MultiIndex.from_product([range(3), ["foo", "bar"]]), - ) - df.info(buf=buf) - assert "+" in buf.getvalue() - - def test_info_memory_usage_bug_on_multiindex(self): - # GH 14308 - # memory usage introspection should not materialize .values - - from string import ascii_uppercase as uppercase - - def memory_usage(f): - return f.memory_usage(deep=True).sum() - - N = 100 - M = len(uppercase) - index = pd.MultiIndex.from_product( - [list(uppercase), pd.date_range("20160101", periods=N)], - names=["id", "date"], - ) - df = DataFrame({"value": np.random.randn(N * M)}, index=index) - - unstacked = df.unstack("id") - assert df.values.nbytes == unstacked.values.nbytes - assert memory_usage(df) > memory_usage(unstacked) - - # high upper bound - assert memory_usage(unstacked) - memory_usage(df) < 2000 - - def test_info_categorical(self): - # GH14298 - idx = pd.CategoricalIndex(["a", "b"]) - df = pd.DataFrame(np.zeros((2, 2)), index=idx, columns=idx) - - buf = StringIO() - df.info(buf=buf) - - def test_info_categorical_column(self): - - # make sure it works - n = 2500 - df = DataFrame({"int64": np.random.randint(100, size=n)}) - df["category"] = Series( - np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) - ).astype("category") - df.isna() - buf = StringIO() - df.info(buf=buf) - - df2 = df[df["category"] == "d"] - buf = StringIO() - df2.info(buf=buf) - def test_repr_categorical_dates_periods(self): # normal DataFrame dt = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py new file mode 100644 index 0000000000000..877bd1650ae60 --- /dev/null +++ b/pandas/tests/io/formats/test_info.py @@ -0,0 +1,405 @@ +from io import StringIO +import re +from string import ascii_uppercase as uppercase +import sys +import textwrap + +import numpy as np +import pytest + +from pandas.compat import PYPY + +from pandas import ( + CategoricalIndex, + DataFrame, + MultiIndex, + Series, + date_range, + option_context, + reset_option, + set_option, +) +import pandas._testing as tm + + +@pytest.fixture +def datetime_frame(): + """ + Fixture for DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + 2000-01-03 -1.122153 0.468535 0.122226 1.693711 + 2000-01-04 0.189378 0.486100 0.007864 -1.216052 + 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357 + 2000-01-06 0.430050 0.894352 0.090719 0.036939 + 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335 + 2000-01-10 -0.752633 0.328434 -0.815325 0.699674 + 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106 + ... ... ... ... ... + 2000-02-03 1.642618 -0.579288 0.046005 1.385249 + 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351 + 2000-02-07 -2.656149 -0.601387 1.410148 0.444150 + 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300 + 2000-02-09 1.377373 0.398619 1.008453 -0.928207 + 2000-02-10 0.473194 -0.636677 0.984058 0.511519 + 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getTimeSeriesData()) + + +def test_info_categorical_column(): + + # make sure it works + n = 2500 + df = DataFrame({"int64": np.random.randint(100, size=n)}) + df["category"] = Series( + np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) + ).astype("category") + df.isna() + buf = StringIO() + df.info(buf=buf) + + df2 = df[df["category"] == "d"] + buf = StringIO() + df2.info(buf=buf) + + +def test_info(float_frame, datetime_frame): + io = StringIO() + float_frame.info(buf=io) + datetime_frame.info(buf=io) + + frame = DataFrame(np.random.randn(5, 3)) + + frame.info() + frame.info(verbose=False) + + +def test_info_verbose(): + buf = StringIO() + size = 1001 + start = 5 + frame = DataFrame(np.random.randn(3, size)) + frame.info(verbose=True, buf=buf) + + res = buf.getvalue() + header = " # Column Dtype \n--- ------ ----- " + assert header in res + + frame.info(verbose=True, buf=buf) + buf.seek(0) + lines = buf.readlines() + assert len(lines) > 0 + + for i, line in enumerate(lines): + if i >= start and i < start + size: + line_nr = f" {i - start} " + assert line.startswith(line_nr) + + +def test_info_memory(): + # https://github.com/pandas-dev/pandas/issues/21056 + df = DataFrame({"a": Series([1, 2], dtype="i8")}) + buf = StringIO() + df.info(buf=buf) + result = buf.getvalue() + bytes = float(df.memory_usage().sum()) + expected = textwrap.dedent( + f"""\ + + RangeIndex: 2 entries, 0 to 1 + Data columns (total 1 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 a 2 non-null int64 + dtypes: int64(1) + memory usage: {bytes} bytes + """ + ) + assert result == expected + + +def test_info_wide(): + io = StringIO() + df = DataFrame(np.random.randn(5, 101)) + df.info(buf=io) + + io = StringIO() + df.info(buf=io, max_cols=101) + rs = io.getvalue() + assert len(rs.splitlines()) > 100 + xp = rs + + set_option("display.max_info_columns", 101) + io = StringIO() + df.info(buf=io) + assert rs == xp + reset_option("display.max_info_columns") + + +def test_info_duplicate_columns(): + io = StringIO() + + # it works! + frame = DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"]) + frame.info(buf=io) + + +def test_info_duplicate_columns_shows_correct_dtypes(): + # GH11761 + io = StringIO() + + frame = DataFrame([[1, 2.0]], columns=["a", "a"]) + frame.info(buf=io) + io.seek(0) + lines = io.readlines() + assert " 0 a 1 non-null int64 \n" == lines[5] + assert " 1 a 1 non-null float64\n" == lines[6] + + +def test_info_shows_column_dtypes(): + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + data = {} + n = 10 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + buf = StringIO() + df.info(buf=buf) + res = buf.getvalue() + header = ( + " # Column Non-Null Count Dtype \n" + "--- ------ -------------- ----- " + ) + assert header in res + for i, dtype in enumerate(dtypes): + name = f" {i:d} {i:d} {n:d} non-null {dtype}" + assert name in res + + +def test_info_max_cols(): + df = DataFrame(np.random.randn(10, 5)) + for len_, verbose in [(5, None), (5, False), (12, True)]: + # For verbose always ^ setting ^ summarize ^ full output + with option_context("max_info_columns", 4): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + for len_, verbose in [(12, None), (5, False), (12, True)]: + + # max_cols not exceeded + with option_context("max_info_columns", 5): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + for len_, max_cols in [(12, 5), (5, 4)]: + # setting truncates + with option_context("max_info_columns", 4): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + # setting wouldn't truncate + with option_context("max_info_columns", 5): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + +def test_info_memory_usage(): + # Ensure memory usage is displayed, when asserted, on the last line + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + data = {} + n = 10 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + buf = StringIO() + + # display memory usage case + df.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert "memory usage: " in res[-1] + + # do not display memory usage case + df.info(buf=buf, memory_usage=False) + res = buf.getvalue().splitlines() + assert "memory usage: " not in res[-1] + + df.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + + # memory usage is a lower bound, so print it as XYZ+ MB + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df.iloc[:, :5].info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + + # excluded column with object dtype, so estimate is accurate + assert not re.match(r"memory usage: [^+]+\+", res[-1]) + + # Test a DataFrame with duplicate columns + dtypes = ["int64", "int64", "int64", "float64"] + data = {} + n = 100 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + df.columns = dtypes + + df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + df_with_object_index.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df_with_object_index.info(buf=buf, memory_usage="deep") + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+$", res[-1]) + + # Ensure df size is as expected + # (cols * rows * bytes) + index size + df_size = df.memory_usage().sum() + exp_size = len(dtypes) * n * 8 + df.index.nbytes + assert df_size == exp_size + + # Ensure number of cols in memory_usage is the same as df + size_df = np.size(df.columns.values) + 1 # index=True; default + assert size_df == np.size(df.memory_usage()) + + # assert deep works only on object + assert df.memory_usage().sum() == df.memory_usage(deep=True).sum() + + # test for validity + DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True) + DataFrame(1, index=["a"], columns=["A"]).index.nbytes + df = DataFrame( + data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"], + ) + df.index.nbytes + df.memory_usage(index=True) + df.index.values.nbytes + + mem = df.memory_usage(deep=True).sum() + assert mem > 0 + + +@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") +def test_info_memory_usage_deep_not_pypy(): + df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + assert ( + df_with_object_index.memory_usage(index=True, deep=True).sum() + > df_with_object_index.memory_usage(index=True).sum() + ) + + df_object = DataFrame({"a": ["a"]}) + assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum() + + +@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result") +def test_info_memory_usage_deep_pypy(): + df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + assert ( + df_with_object_index.memory_usage(index=True, deep=True).sum() + == df_with_object_index.memory_usage(index=True).sum() + ) + + df_object = DataFrame({"a": ["a"]}) + assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum() + + +@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design") +def test_usage_via_getsizeof(): + df = DataFrame( + data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"], + ) + mem = df.memory_usage(deep=True).sum() + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = mem - sys.getsizeof(df) + assert abs(diff) < 100 + + +def test_info_memory_usage_qualified(): + + buf = StringIO() + df = DataFrame(1, columns=list("ab"), index=[1, 2, 3]) + df.info(buf=buf) + assert "+" not in buf.getvalue() + + buf = StringIO() + df = DataFrame(1, columns=list("ab"), index=list("ABC")) + df.info(buf=buf) + assert "+" in buf.getvalue() + + buf = StringIO() + df = DataFrame( + 1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)]), + ) + df.info(buf=buf) + assert "+" not in buf.getvalue() + + buf = StringIO() + df = DataFrame( + 1, + columns=list("ab"), + index=MultiIndex.from_product([range(3), ["foo", "bar"]]), + ) + df.info(buf=buf) + assert "+" in buf.getvalue() + + +def test_info_memory_usage_bug_on_multiindex(): + # GH 14308 + # memory usage introspection should not materialize .values + + def memory_usage(f): + return f.memory_usage(deep=True).sum() + + N = 100 + M = len(uppercase) + index = MultiIndex.from_product( + [list(uppercase), date_range("20160101", periods=N)], names=["id", "date"], + ) + df = DataFrame({"value": np.random.randn(N * M)}, index=index) + + unstacked = df.unstack("id") + assert df.values.nbytes == unstacked.values.nbytes + assert memory_usage(df) > memory_usage(unstacked) + + # high upper bound + assert memory_usage(unstacked) - memory_usage(df) < 2000 + + +def test_info_categorical(): + # GH14298 + idx = CategoricalIndex(["a", "b"]) + df = DataFrame(np.zeros((2, 2)), index=idx, columns=idx) + + buf = StringIO() + df.info(buf=buf) From 0c107bdf930780b8adca31f64d8c4648dd2cc1d4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Tue, 18 Feb 2020 19:30:08 -0600 Subject: [PATCH 070/388] DOC: Update sort_index docs (#31898) --- pandas/core/frame.py | 5 +++-- pandas/core/series.py | 15 ++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4f36e128059b..9fe1ec7b792c8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4732,8 +4732,9 @@ def sort_index( and 1 identifies the columns. level : int or level name or list of ints or list of level names If not None, sort on values in specified index level(s). - ascending : bool, default True - Sort ascending vs. descending. + ascending : bool or list of bools, default True + Sort ascending vs. descending. When the index is a MultiIndex the + sort direction can be controlled for each level individually. inplace : bool, default False If True, perform operation in-place. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' diff --git a/pandas/core/series.py b/pandas/core/series.py index 15fe0bb98b536..9c0ff9780da3e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2950,11 +2950,11 @@ def sort_index( self, axis=0, level=None, - ascending=True, - inplace=False, - kind="quicksort", - na_position="last", - sort_remaining=True, + ascending: bool = True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + sort_remaining: bool = True, ignore_index: bool = False, ): """ @@ -2969,8 +2969,9 @@ def sort_index( Axis to direct sorting. This can only be 0 for Series. level : int, optional If not None, sort on values in specified index level(s). - ascending : bool, default true - Sort ascending vs. descending. + ascending : bool or list of bools, default True + Sort ascending vs. descending. When the index is a MultiIndex the + sort direction can be controlled for each level individually. inplace : bool, default False If True, perform operation in-place. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' From 3cb81ea69ae4b3595ebf975ddbabc80ec8b24ce2 Mon Sep 17 00:00:00 2001 From: RaisaDZ <34237447+RaisaDZ@users.noreply.github.com> Date: Wed, 19 Feb 2020 01:46:25 +0000 Subject: [PATCH 071/388] DOC: Mention black and PEP8 in pandas style guide (#32043) --- doc/source/development/code_style.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst index bcddc033a61f5..17f8783f71bfb 100644 --- a/doc/source/development/code_style.rst +++ b/doc/source/development/code_style.rst @@ -9,6 +9,12 @@ pandas code style guide .. contents:: Table of contents: :local: +*pandas* follows the `PEP8 `_ +standard and uses `Black `_ +and `Flake8 `_ to ensure a +consistent code format throughout the project. For details see the +:ref:`contributing guide to pandas`. + Patterns ======== From ac3056f2f13e5287dc66d09f31522f7fba592fec Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 19 Feb 2020 01:47:38 +0000 Subject: [PATCH 072/388] REGR: show_versions (#32041) --- pandas/tests/test_optional_dependency.py | 8 ++++---- pandas/tests/util/test_show_versions.py | 22 ++++++++++++++++++++++ pandas/util/_print_versions.py | 4 ++-- 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 pandas/tests/util/test_show_versions.py diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py index ce527214e55e7..e5ed69b7703b1 100644 --- a/pandas/tests/test_optional_dependency.py +++ b/pandas/tests/test_optional_dependency.py @@ -22,12 +22,12 @@ def test_xlrd_version_fallback(): import_optional_dependency("xlrd") -def test_bad_version(): +def test_bad_version(monkeypatch): name = "fakemodule" module = types.ModuleType(name) module.__version__ = "0.9.0" sys.modules[name] = module - VERSIONS[name] = "1.0.0" + monkeypatch.setitem(VERSIONS, name, "1.0.0") match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'" with pytest.raises(ImportError, match=match): @@ -42,11 +42,11 @@ def test_bad_version(): assert result is module -def test_no_version_raises(): +def test_no_version_raises(monkeypatch): name = "fakemodule" module = types.ModuleType(name) sys.modules[name] = module - VERSIONS[name] = "1.0.0" + monkeypatch.setitem(VERSIONS, name, "1.0.0") with pytest.raises(ImportError, match="Can't determine .* fakemodule"): import_optional_dependency(name) diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py new file mode 100644 index 0000000000000..0d2c81c4ea6c7 --- /dev/null +++ b/pandas/tests/util/test_show_versions.py @@ -0,0 +1,22 @@ +import re + +import pandas as pd + + +def test_show_versions(capsys): + # gh-32041 + pd.show_versions() + captured = capsys.readouterr() + result = captured.out + + # check header + assert "INSTALLED VERSIONS" in result + + # check full commit hash + assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) + + # check required dependency + assert re.search(r"numpy\s*:\s([0-9\.\+a-f]|dev)+\n", result) + + # check optional dependency + assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index fdfa436ce6536..99b2b9e9f5f6e 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -118,10 +118,10 @@ def show_versions(as_json=False): print("\nINSTALLED VERSIONS") print("------------------") for k, stat in sys_info: - print(f"{{k:<{maxlen}}}: {{stat}}") + print(f"{k:<{maxlen}}: {stat}") print("") for k, stat in deps_blob: - print(f"{{k:<{maxlen}}}: {{stat}}") + print(f"{k:<{maxlen}}: {stat}") def main() -> int: From 02ac975e8c65109f6f688146f3f1e944162613be Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 19 Feb 2020 22:29:57 +0000 Subject: [PATCH 073/388] TYP: check_untyped_defs core.arrays.categorical (#32097) --- pandas/core/arrays/categorical.py | 11 ++++------- setup.cfg | 3 --- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d469b574820f9..a5048e3aae899 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -341,10 +341,7 @@ def __init__( values = _convert_to_list_like(values) # By convention, empty lists result in object dtype: - if len(values) == 0: - sanitize_dtype = "object" - else: - sanitize_dtype = None + sanitize_dtype = "object" if len(values) == 0 else None null_mask = isna(values) if null_mask.any(): values = [values[idx] for idx in np.where(~null_mask)[0]] @@ -1496,7 +1493,7 @@ def check_for_ordered(self, op): def _values_for_argsort(self): return self._codes.copy() - def argsort(self, ascending=True, kind="quicksort", *args, **kwargs): + def argsort(self, ascending=True, kind="quicksort", **kwargs): """ Return the indices that would sort the Categorical. @@ -1511,7 +1508,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs): or descending sort. kind : {'quicksort', 'mergesort', 'heapsort'}, optional Sorting algorithm. - *args, **kwargs: + **kwargs: passed through to :func:`numpy.argsort`. Returns @@ -1547,7 +1544,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs): >>> cat.argsort() array([2, 0, 1]) """ - return super().argsort(ascending=ascending, kind=kind, *args, **kwargs) + return super().argsort(ascending=ascending, kind=kind, **kwargs) def sort_values(self, inplace=False, ascending=True, na_position="last"): """ diff --git a/setup.cfg b/setup.cfg index 4a900e581c353..4b0b32b09ca8d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -147,9 +147,6 @@ check_untyped_defs=False [mypy-pandas._version] check_untyped_defs=False -[mypy-pandas.core.arrays.categorical] -check_untyped_defs=False - [mypy-pandas.core.arrays.interval] check_untyped_defs=False From 30bb0f01286751156d755755dfb920fc981baef3 Mon Sep 17 00:00:00 2001 From: RaisaDZ <34237447+RaisaDZ@users.noreply.github.com> Date: Wed, 19 Feb 2020 23:19:48 +0000 Subject: [PATCH 074/388] add messages to tests (#31852) * add messages to tests * changes to test_boolean.py * split error messages * change to test_isin.py * changes to test_boolean.py and test_indexing.py * revert changes to test_diff.py * cleanups to the code * changes to test_boolean.py and test_replace.py * change error message in test_to_dict.py --- .../tests/extension/decimal/test_decimal.py | 3 +- pandas/tests/extension/json/test_json.py | 5 +- pandas/tests/extension/test_boolean.py | 4 +- pandas/tests/extension/test_categorical.py | 3 +- .../tests/frame/indexing/test_categorical.py | 47 ++++++++++--------- pandas/tests/frame/indexing/test_indexing.py | 18 ++++--- pandas/tests/frame/indexing/test_where.py | 12 +++-- pandas/tests/frame/methods/test_explode.py | 4 +- pandas/tests/frame/methods/test_isin.py | 15 ++++-- pandas/tests/frame/methods/test_quantile.py | 3 +- pandas/tests/frame/methods/test_round.py | 23 +++++---- .../tests/frame/methods/test_sort_values.py | 2 +- pandas/tests/frame/methods/test_to_dict.py | 8 +++- 13 files changed, 90 insertions(+), 57 deletions(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index a78e4bb34e42a..f4ffcb8d0f109 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -148,7 +148,8 @@ class Reduce: def check_reduce(self, s, op_name, skipna): if op_name in ["median", "skew", "kurt"]: - with pytest.raises(NotImplementedError): + msg = r"decimal does not support the .* operation" + with pytest.raises(NotImplementedError, match=msg): getattr(s, op_name)(skipna=skipna) else: diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index f7ca99be2adea..d086896fb09c3 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -136,10 +136,11 @@ def test_custom_asserts(self): self.assert_frame_equal(a.to_frame(), a.to_frame()) b = pd.Series(data.take([0, 0, 1])) - with pytest.raises(AssertionError): + msg = r"ExtensionArray are different" + with pytest.raises(AssertionError, match=msg): self.assert_series_equal(a, b) - with pytest.raises(AssertionError): + with pytest.raises(AssertionError, match=msg): self.assert_frame_equal(a.to_frame(), b.to_frame()) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 0c6b187eac1fc..e2331b69916fb 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -112,9 +112,9 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError): # subtraction for bools raises TypeError (but not yet in 1.13) if _np_version_under1p14: pytest.skip("__sub__ does not yet raise in numpy 1.13") - with pytest.raises(TypeError): + msg = r"numpy boolean subtract" + with pytest.raises(TypeError, match=msg): op(s, other) - return result = op(s, other) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 336b23e54d74c..69a97f5c9fe02 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -278,7 +278,8 @@ def _compare_other(self, s, data, op_name, other): assert (result == expected).all() else: - with pytest.raises(TypeError): + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): op(data, other) diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index 3a472a8b58b6c..f5b3f980cc534 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -115,7 +115,12 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set - with pytest.raises(ValueError): + msg1 = ( + "Cannot setitem on a Categorical with a new category, " + "set the categories first" + ) + msg2 = "Cannot set a Categorical with another, without identical categories" + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.iloc[2, 0] = "c" @@ -125,7 +130,7 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_row) # - assign a complete row (mixed values) not in categories set - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.iloc[2, :] = ["c", 2] @@ -134,7 +139,7 @@ def test_assigning_ops(self): df.iloc[2:4, :] = [["b", 2], ["b", 2]] tm.assert_frame_equal(df, exp_multi_row) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.iloc[2:4, :] = [["c", 2], ["c", 2]] @@ -144,12 +149,12 @@ def test_assigning_ops(self): df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg2): # different categories -> not sure if this should fail or pass df = orig.copy() df.iloc[2:4, 0] = Categorical(list("bb"), categories=list("abc")) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg2): # different values df = orig.copy() df.iloc[2:4, 0] = Categorical(list("cc"), categories=list("abc")) @@ -160,7 +165,7 @@ def test_assigning_ops(self): df.iloc[2:4, 0] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df.iloc[2:4, 0] = ["c", "c"] # loc @@ -175,7 +180,7 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.loc["j", "cats"] = "c" @@ -185,7 +190,7 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_row) # - assign a complete row (mixed values) not in categories set - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.loc["j", :] = ["c", 2] @@ -194,7 +199,7 @@ def test_assigning_ops(self): df.loc["j":"k", :] = [["b", 2], ["b", 2]] tm.assert_frame_equal(df, exp_multi_row) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.loc["j":"k", :] = [["c", 2], ["c", 2]] @@ -204,14 +209,14 @@ def test_assigning_ops(self): df.loc["j":"k", "cats"] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg2): # different categories -> not sure if this should fail or pass df = orig.copy() df.loc["j":"k", "cats"] = Categorical( ["b", "b"], categories=["a", "b", "c"] ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg2): # different values df = orig.copy() df.loc["j":"k", "cats"] = Categorical( @@ -224,7 +229,7 @@ def test_assigning_ops(self): df.loc["j":"k", "cats"] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df.loc["j":"k", "cats"] = ["c", "c"] # loc @@ -239,7 +244,7 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.loc["j", df.columns[0]] = "c" @@ -249,7 +254,7 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_row) # - assign a complete row (mixed values) not in categories set - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.loc["j", :] = ["c", 2] @@ -258,7 +263,7 @@ def test_assigning_ops(self): df.loc["j":"k", :] = [["b", 2], ["b", 2]] tm.assert_frame_equal(df, exp_multi_row) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.loc["j":"k", :] = [["c", 2], ["c", 2]] @@ -268,14 +273,14 @@ def test_assigning_ops(self): df.loc["j":"k", df.columns[0]] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg2): # different categories -> not sure if this should fail or pass df = orig.copy() df.loc["j":"k", df.columns[0]] = Categorical( ["b", "b"], categories=["a", "b", "c"] ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg2): # different values df = orig.copy() df.loc["j":"k", df.columns[0]] = Categorical( @@ -288,7 +293,7 @@ def test_assigning_ops(self): df.loc["j":"k", df.columns[0]] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df.loc["j":"k", df.columns[0]] = ["c", "c"] # iat @@ -297,7 +302,7 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.iat[2, 0] = "c" @@ -308,7 +313,7 @@ def test_assigning_ops(self): tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.at["j", "cats"] = "c" @@ -332,7 +337,7 @@ def test_assigning_ops(self): df.at["j", "cats"] = "b" tm.assert_frame_equal(df, exp_single_cats_value) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg1): df = orig.copy() df.at["j", "cats"] = "c" diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index d892e3d637772..fcf0a41e0f74e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -481,7 +481,8 @@ def test_setitem(self, float_frame): # so raise/warn smaller = float_frame[:2] - with pytest.raises(com.SettingWithCopyError): + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): smaller["col10"] = ["1", "2"] assert smaller["col10"].dtype == np.object_ @@ -865,7 +866,8 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): # setting it triggers setting with copy sliced = float_frame.iloc[:, -3:] - with pytest.raises(com.SettingWithCopyError): + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): sliced["C"] = 4.0 assert (float_frame["C"] == 4).all() @@ -992,7 +994,7 @@ def test_getitem_setitem_fancy_exceptions(self, float_frame): with pytest.raises(IndexingError, match="Too many indexers"): ix[:, :, :] - with pytest.raises(IndexingError): + with pytest.raises(IndexingError, match="Too many indexers"): ix[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self, float_frame): @@ -1071,10 +1073,10 @@ def test_getitem_setitem_float_labels(self): cp = df.copy() - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): cp.iloc[1.0:5] = 0 - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): result = cp.iloc[1.0:5] == 0 # noqa assert result.values.all() @@ -1470,7 +1472,8 @@ def test_iloc_row(self): # verify slice is view # setting it makes it raise/warn - with pytest.raises(com.SettingWithCopyError): + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): result[2] = 0.0 exp_col = df[2].copy() @@ -1501,7 +1504,8 @@ def test_iloc_col(self): # verify slice is view # and that we are setting a copy - with pytest.raises(com.SettingWithCopyError): + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): result[8] = 0.0 assert (df[8] == 0).all() diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 507b2e9cd237b..eee754a47fb8c 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -50,7 +50,8 @@ def _check_get(df, cond, check_dtypes=True): # check getting df = where_frame if df is float_string_frame: - with pytest.raises(TypeError): + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): df > 0 return cond = df > 0 @@ -114,7 +115,8 @@ def _check_align(df, cond, other, check_dtypes=True): df = where_frame if df is float_string_frame: - with pytest.raises(TypeError): + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): df > 0 return @@ -172,7 +174,8 @@ def _check_set(df, cond, check_dtypes=True): df = where_frame if df is float_string_frame: - with pytest.raises(TypeError): + msg = "'>' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): df > 0 return @@ -358,7 +361,8 @@ def test_where_datetime(self): ) stamp = datetime(2013, 1, 3) - with pytest.raises(TypeError): + msg = "'>' not supported between instances of 'float' and 'datetime.datetime'" + with pytest.raises(TypeError, match=msg): df > stamp result = df[df.iloc[:, :-1] > stamp] diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 76c87ed355492..bad8349ec977b 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -9,11 +9,11 @@ def test_error(): df = pd.DataFrame( {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="column must be a scalar"): df.explode(list("AA")) df.columns = list("AA") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="columns must be unique"): df.explode("A") diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index 0eb94afc99d94..6307738021f68 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -60,10 +60,14 @@ def test_isin_with_string_scalar(self): }, index=["foo", "bar", "baz", "qux"], ) - with pytest.raises(TypeError): + msg = ( + r"only list-like or dict-like objects are allowed " + r"to be passed to DataFrame.isin\(\), you passed a 'str'" + ) + with pytest.raises(TypeError, match=msg): df.isin("a") - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): df.isin("aaa") def test_isin_df(self): @@ -92,7 +96,8 @@ def test_isin_df_dupe_values(self): df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}) # just cols duped df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=["B", "B"]) - with pytest.raises(ValueError): + msg = r"cannot compute isin with a duplicate axis\." + with pytest.raises(ValueError, match=msg): df1.isin(df2) # just index duped @@ -101,12 +106,12 @@ def test_isin_df_dupe_values(self): columns=["A", "B"], index=[0, 0, 1, 1], ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): df1.isin(df2) # cols and index: df2.columns = ["B", "B"] - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): df1.isin(df2) def test_isin_dupe_self(self): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 64461c08d34f4..9c52e8ec5620f 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -75,7 +75,8 @@ def test_quantile_axis_mixed(self): tm.assert_series_equal(result, expected) # must raise - with pytest.raises(TypeError): + msg = "'<' not supported between instances of 'Timestamp' and 'float'" + with pytest.raises(TypeError, match=msg): df.quantile(0.5, axis=1, numeric_only=False) def test_quantile_axis_parameter(self): diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index 0865e03cedc50..6dcdf49e93729 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -34,7 +34,8 @@ def test_round(self): # Round with a list round_list = [1, 2] - with pytest.raises(TypeError): + msg = "decimals must be an integer, a dict-like or a Series" + with pytest.raises(TypeError, match=msg): df.round(round_list) # Round with a dictionary @@ -57,34 +58,37 @@ def test_round(self): # float input to `decimals` non_int_round_dict = {"col1": 1, "col2": 0.5} - with pytest.raises(TypeError): + msg = "integer argument expected, got float" + with pytest.raises(TypeError, match=msg): df.round(non_int_round_dict) # String input non_int_round_dict = {"col1": 1, "col2": "foo"} - with pytest.raises(TypeError): + msg = r"an integer is required \(got type str\)" + with pytest.raises(TypeError, match=msg): df.round(non_int_round_dict) non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): df.round(non_int_round_Series) # List input non_int_round_dict = {"col1": 1, "col2": [1, 2]} - with pytest.raises(TypeError): + msg = r"an integer is required \(got type list\)" + with pytest.raises(TypeError, match=msg): df.round(non_int_round_dict) non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): df.round(non_int_round_Series) # Non integer Series inputs non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): df.round(non_int_round_Series) non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): df.round(non_int_round_Series) # Negative numbers @@ -103,7 +107,8 @@ def test_round(self): {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} ) - with pytest.raises(TypeError): + msg = "integer argument expected, got float" + with pytest.raises(TypeError, match=msg): df.round(nan_round_Series) # Make sure this doesn't break existing Series.round diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 96f4d6ed90d6b..5a25d1c2c0894 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -458,7 +458,7 @@ def test_sort_values_na_position_with_categories_raises(self): } ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="invalid na_position: bad_position"): df.sort_values(by="c", ascending=False, na_position="bad_position") @pytest.mark.parametrize("inplace", [True, False]) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 40393721c4ac6..cd9bd169322fd 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -132,7 +132,13 @@ def test_to_dict(self, mapping): def test_to_dict_errors(self, mapping): # GH#16122 df = DataFrame(np.random.randn(3, 3)) - with pytest.raises(TypeError): + msg = "|".join( + [ + "unsupported type: ", + r"to_dict\(\) only accepts initialized defaultdicts", + ] + ) + with pytest.raises(TypeError, match=msg): df.to_dict(into=mapping) def test_to_dict_not_unique_warning(self): From 74181e1bba8806dbe1f849d376a65d418b3f5215 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 20 Feb 2020 01:04:10 +0000 Subject: [PATCH 075/388] CLN: Remove unused script find_commits_touching_func.py (#32071) --- scripts/find_commits_touching_func.py | 244 -------------------------- 1 file changed, 244 deletions(-) delete mode 100755 scripts/find_commits_touching_func.py diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py deleted file mode 100755 index 85675cb6df42b..0000000000000 --- a/scripts/find_commits_touching_func.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python3 -# copyright 2013, y-p @ github -""" -Search the git history for all commits touching a named method - -You need the sh module to run this -WARNING: this script uses git clean -f, running it on a repo with untracked -files will probably erase them. - -Usage:: - $ ./find_commits_touching_func.py (see arguments below) -""" -import argparse -from collections import namedtuple -import logging -import os -import re - -from dateutil.parser import parse - -try: - import sh -except ImportError: - raise ImportError("The 'sh' package is required to run this script.") - - -desc = """ -Find all commits touching a specified function across the codebase. -""".strip() -argparser = argparse.ArgumentParser(description=desc) -argparser.add_argument( - "funcname", - metavar="FUNCNAME", - help="Name of function/method to search for changes on", -) -argparser.add_argument( - "-f", - "--file-masks", - metavar="f_re(,f_re)*", - default=[r"\.py.?$"], - help="comma separated list of regexes to match " - "filenames against\ndefaults all .py? files", -) -argparser.add_argument( - "-d", - "--dir-masks", - metavar="d_re(,d_re)*", - default=[], - help="comma separated list of regexes to match base path against", -) -argparser.add_argument( - "-p", - "--path-masks", - metavar="p_re(,p_re)*", - default=[], - help="comma separated list of regexes to match full file path against", -) -argparser.add_argument( - "-y", - "--saw-the-warning", - action="store_true", - default=False, - help="must specify this to run, acknowledge you " - "realize this will erase untracked files", -) -argparser.add_argument( - "--debug-level", - default="CRITICAL", - help="debug level of messages (DEBUG, INFO, etc...)", -) -args = argparser.parse_args() - - -lfmt = logging.Formatter(fmt="%(levelname)-8s %(message)s", datefmt="%m-%d %H:%M:%S") -shh = logging.StreamHandler() -shh.setFormatter(lfmt) -logger = logging.getLogger("findit") -logger.addHandler(shh) - -Hit = namedtuple("Hit", "commit path") -HASH_LEN = 8 - - -def clean_checkout(comm): - h, s, d = get_commit_vitals(comm) - if len(s) > 60: - s = s[:60] + "..." - s = s.split("\n")[0] - logger.info("CO: %s %s" % (comm, s)) - - sh.git("checkout", comm, _tty_out=False) - sh.git("clean", "-f") - - -def get_hits(defname, files=()): - cs = set() - for f in files: - try: - r = sh.git( - "blame", - "-L", - r"/def\s*{start}/,/def/".format(start=defname), - f, - _tty_out=False, - ) - except sh.ErrorReturnCode_128: - logger.debug("no matches in %s" % f) - continue - - lines = r.strip().splitlines()[:-1] - # remove comment lines - lines = [x for x in lines if not re.search(r"^\w+\s*\(.+\)\s*#", x)] - hits = set(map(lambda x: x.split(" ")[0], lines)) - cs.update({Hit(commit=c, path=f) for c in hits}) - - return cs - - -def get_commit_info(c, fmt, sep="\t"): - r = sh.git( - "log", - "--format={}".format(fmt), - "{}^..{}".format(c, c), - "-n", - "1", - _tty_out=False, - ) - return str(r).split(sep) - - -def get_commit_vitals(c, hlen=HASH_LEN): - h, s, d = get_commit_info(c, "%H\t%s\t%ci", "\t") - return h[:hlen], s, parse(d) - - -def file_filter(state, dirname, fnames): - if args.dir_masks and not any(re.search(x, dirname) for x in args.dir_masks): - return - for f in fnames: - p = os.path.abspath(os.path.join(os.path.realpath(dirname), f)) - if any(re.search(x, f) for x in args.file_masks) or any( - re.search(x, p) for x in args.path_masks - ): - if os.path.isfile(p): - state["files"].append(p) - - -def search(defname, head_commit="HEAD"): - HEAD, s = get_commit_vitals("HEAD")[:2] - logger.info("HEAD at %s: %s" % (HEAD, s)) - done_commits = set() - # allhits = set() - files = [] - state = dict(files=files) - os.walk(".", file_filter, state) - # files now holds a list of paths to files - - # seed with hits from q - allhits = set(get_hits(defname, files=files)) - q = {HEAD} - try: - while q: - h = q.pop() - clean_checkout(h) - hits = get_hits(defname, files=files) - for x in hits: - prevc = get_commit_vitals(x.commit + "^")[0] - if prevc not in done_commits: - q.add(prevc) - allhits.update(hits) - done_commits.add(h) - - logger.debug("Remaining: %s" % q) - finally: - logger.info("Restoring HEAD to %s" % HEAD) - clean_checkout(HEAD) - return allhits - - -def pprint_hits(hits): - SUBJ_LEN = 50 - PATH_LEN = 20 - hits = list(hits) - max_p = 0 - for hit in hits: - p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] - max_p = max(max_p, len(p)) - - if max_p < PATH_LEN: - SUBJ_LEN += PATH_LEN - max_p - PATH_LEN = max_p - - def sorter(i): - h, s, d = get_commit_vitals(hits[i].commit) - return hits[i].path, d - - print( - ("\nThese commits touched the %s method in these files on these dates:\n") - % args.funcname - ) - for i in sorted(range(len(hits)), key=sorter): - hit = hits[i] - h, s, d = get_commit_vitals(hit.commit) - p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] - - fmt = "{:%d} {:10} {:<%d} {:<%d}" % (HASH_LEN, SUBJ_LEN, PATH_LEN) - if len(s) > SUBJ_LEN: - s = s[: SUBJ_LEN - 5] + " ..." - print(fmt.format(h[:HASH_LEN], d.isoformat()[:10], s, p[-20:])) - - print("\n") - - -def main(): - if not args.saw_the_warning: - argparser.print_help() - print( - """ -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -WARNING: -this script uses git clean -f, running it on a repo with untracked files. -It's recommended that you make a fresh clone and run from its root directory. -You must specify the -y argument to ignore this warning. -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -""" - ) - return - if isinstance(args.file_masks, str): - args.file_masks = args.file_masks.split(",") - if isinstance(args.path_masks, str): - args.path_masks = args.path_masks.split(",") - if isinstance(args.dir_masks, str): - args.dir_masks = args.dir_masks.split(",") - - logger.setLevel(getattr(logging, args.debug_level)) - - hits = search(args.funcname) - pprint_hits(hits) - - -if __name__ == "__main__": - import sys - - sys.exit(main()) From 421f654d8249104a4c2da2b41adaac91b7794587 Mon Sep 17 00:00:00 2001 From: RaisaDZ <34237447+RaisaDZ@users.noreply.github.com> Date: Thu, 20 Feb 2020 01:05:59 +0000 Subject: [PATCH 076/388] Replace old string formatting syntax with f-strings (#32122) --- scripts/validate_docstrings.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index d43086756769a..051bd5b9761ae 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -243,14 +243,12 @@ def pandas_validate(func_name: str): "EX03", error_code=err.error_code, error_message=err.message, - times_happening=" ({} times)".format(err.count) - if err.count > 1 - else "", + times_happening=f" ({err.count} times)" if err.count > 1 else "", ) ) examples_source_code = "".join(doc.examples_source_code) for wrong_import in ("numpy", "pandas"): - if "import {}".format(wrong_import) in examples_source_code: + if f"import {wrong_import}" in examples_source_code: result["errors"].append( pandas_error("EX04", imported_library=wrong_import) ) @@ -345,9 +343,7 @@ def header(title, width=80, char="#"): full_line = char * width side_len = (width - len(title) - 2) // 2 adj = "" if len(title) % 2 == 0 else " " - title_line = "{side} {title}{adj} {side}".format( - side=char * side_len, title=title, adj=adj - ) + title_line = f"{char * side_len} {title}{adj} {char * side_len}" return f"\n{full_line}\n{title_line}\n{full_line}\n\n" From 96644d05e4e7d6791af1a6be60fc8af3b85ad27c Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 20 Feb 2020 01:24:37 +0000 Subject: [PATCH 077/388] TYP: check_untyped_defs core.tools.datetimes (#32101) --- pandas/core/tools/datetimes.py | 20 ++++++++++++-------- setup.cfg | 3 --- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 6d45ddd29d783..b10b736b9134e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -2,7 +2,7 @@ from datetime import datetime, time from functools import partial from itertools import islice -from typing import Optional, TypeVar, Union +from typing import List, Optional, TypeVar, Union import numpy as np @@ -296,7 +296,9 @@ def _convert_listlike_datetimes( if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": - arg = arg.tz_convert(None).tz_localize(tz) + # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has + # no attribute "tz_convert" + arg = arg.tz_convert(None).tz_localize(tz) # type: ignore return arg elif is_datetime64_ns_dtype(arg): @@ -307,7 +309,9 @@ def _convert_listlike_datetimes( pass elif tz: # DatetimeArray, DatetimeIndex - return arg.tz_localize(tz) + # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has + # no attribute "tz_localize" + return arg.tz_localize(tz) # type: ignore return arg @@ -826,18 +830,18 @@ def f(value): required = ["year", "month", "day"] req = sorted(set(required) - set(unit_rev.keys())) if len(req): - required = ",".join(req) + _required = ",".join(req) raise ValueError( "to assemble mappings requires at least that " - f"[year, month, day] be specified: [{required}] is missing" + f"[year, month, day] be specified: [{_required}] is missing" ) # keys we don't recognize excess = sorted(set(unit_rev.keys()) - set(_unit_map.values())) if len(excess): - excess = ",".join(excess) + _excess = ",".join(excess) raise ValueError( - f"extra keys have been passed to the datetime assemblage: [{excess}]" + f"extra keys have been passed to the datetime assemblage: [{_excess}]" ) def coerce(values): @@ -992,7 +996,7 @@ def _convert_listlike(arg, format): if infer_time_format and format is None: format = _guess_time_format_for_array(arg) - times = [] + times: List[Optional[time]] = [] if format is not None: for element in arg: try: diff --git a/setup.cfg b/setup.cfg index 4b0b32b09ca8d..87054f864813f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -231,9 +231,6 @@ check_untyped_defs=False [mypy-pandas.core.strings] check_untyped_defs=False -[mypy-pandas.core.tools.datetimes] -check_untyped_defs=False - [mypy-pandas.core.window.common] check_untyped_defs=False From 60b8f05da7bd6f79909fe77f097fe4fb587143ae Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Wed, 19 Feb 2020 22:09:20 -0600 Subject: [PATCH 078/388] CLN: Clean groupby/test_function.py (#32027) --- pandas/tests/groupby/test_function.py | 223 ++++++++++++++++---------- 1 file changed, 134 insertions(+), 89 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 176c0272ca527..6205dfb87bbd0 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -26,6 +26,26 @@ from pandas.util import _test_decorators as td +@pytest.fixture( + params=[np.int32, np.int64, np.float32, np.float64], + ids=["np.int32", "np.int64", "np.float32", "np.float64"], +) +def numpy_dtypes_for_minmax(request): + """ + Fixture of numpy dtypes with min and max values used for testing + cummin and cummax + """ + dtype = request.param + min_val = ( + np.iinfo(dtype).min if np.dtype(dtype).kind == "i" else np.finfo(dtype).min + ) + max_val = ( + np.iinfo(dtype).max if np.dtype(dtype).kind == "i" else np.finfo(dtype).max + ) + + return (dtype, min_val, max_val) + + @pytest.mark.parametrize("agg_func", ["any", "all"]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize( @@ -174,11 +194,10 @@ def test_arg_passthru(): ) for attr in ["mean", "median"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_frame_equal(result.reindex_like(expected), expected) # TODO: min, max *should* handle @@ -195,11 +214,10 @@ def test_arg_passthru(): ] ) for attr in ["min", "max"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index( @@ -215,29 +233,26 @@ def test_arg_passthru(): ] ) for attr in ["first", "last"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "string", "category_int", "timedelta"]) - for attr in ["sum"]: - f = getattr(df.groupby("group"), attr) - result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) - tm.assert_index_equal(result.columns, expected_columns) + result = df.groupby("group").sum() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = df.groupby("group").sum(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int"]) for attr in ["prod", "cumprod"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) # like min, max, but don't include strings @@ -245,22 +260,20 @@ def test_arg_passthru(): ["int", "float", "category_int", "datetime", "datetimetz", "timedelta"] ) for attr in ["cummin", "cummax"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() # GH 15561: numeric_only=False set by default like min/max tm.assert_index_equal(result.columns, expected_columns) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int", "timedelta"]) - for attr in ["cumsum"]: - f = getattr(df.groupby("group"), attr) - result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) - tm.assert_index_equal(result.columns, expected_columns) + result = getattr(df.groupby("group"), "cumsum")() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = getattr(df.groupby("group"), "cumsum")(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) def test_non_cython_api(): @@ -691,59 +704,31 @@ def test_numpy_compat(func): reason="https://github.com/pandas-dev/pandas/issues/31992", strict=False, ) -def test_cummin_cummax(): +def test_cummin(numpy_dtypes_for_minmax): + dtype = numpy_dtypes_for_minmax[0] + min_val = numpy_dtypes_for_minmax[1] + # GH 15048 - num_types = [np.int32, np.int64, np.float32, np.float64] - num_mins = [ - np.iinfo(np.int32).min, - np.iinfo(np.int64).min, - np.finfo(np.float32).min, - np.finfo(np.float64).min, - ] - num_max = [ - np.iinfo(np.int32).max, - np.iinfo(np.int64).max, - np.finfo(np.float32).max, - np.finfo(np.float64).max, - ] base_df = pd.DataFrame( {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} ) expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] - expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] - for dtype, min_val, max_val in zip(num_types, num_mins, num_max): - df = base_df.astype(dtype) + df = base_df.astype(dtype) - # cummin - expected = pd.DataFrame({"B": expected_mins}).astype(dtype) - result = df.groupby("A").cummin() - tm.assert_frame_equal(result, expected) - result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() - tm.assert_frame_equal(result, expected) - - # Test cummin w/ min value for dtype - df.loc[[2, 6], "B"] = min_val - expected.loc[[2, 3, 6, 7], "B"] = min_val - result = df.groupby("A").cummin() - tm.assert_frame_equal(result, expected) - expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() - tm.assert_frame_equal(result, expected) - - # cummax - expected = pd.DataFrame({"B": expected_maxs}).astype(dtype) - result = df.groupby("A").cummax() - tm.assert_frame_equal(result, expected) - result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() - tm.assert_frame_equal(result, expected) + expected = pd.DataFrame({"B": expected_mins}).astype(dtype) + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) - # Test cummax w/ max value for dtype - df.loc[[2, 6], "B"] = max_val - expected.loc[[2, 3, 6, 7], "B"] = max_val - result = df.groupby("A").cummax() - tm.assert_frame_equal(result, expected) - expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() - tm.assert_frame_equal(result, expected) + # Test w/ min value for dtype + df.loc[[2, 6], "B"] = min_val + expected.loc[[2, 3, 6, 7], "B"] = min_val + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) # Test nan in some values base_df.loc[[0, 2, 4, 6], "B"] = np.nan @@ -753,30 +738,80 @@ def test_cummin_cummax(): expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(result, expected) - expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) - result = base_df.groupby("A").cummax() - tm.assert_frame_equal(result, expected) - expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() - tm.assert_frame_equal(result, expected) + # GH 15561 + df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) + expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b") + + result = df.groupby("a")["b"].cummin() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) + result = df.groupby("a").b.cummin() + expected = pd.Series([1, 2, 1], name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail( + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, +) +def test_cummin_all_nan_column(): + base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) - # Test nan in entire column - base_df["B"] = np.nan expected = pd.DataFrame({"B": [np.nan] * 8}) result = base_df.groupby("A").cummin() tm.assert_frame_equal(expected, result) result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(expected, result) + + +@pytest.mark.xfail( + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, +) +def test_cummax(numpy_dtypes_for_minmax): + dtype = numpy_dtypes_for_minmax[0] + max_val = numpy_dtypes_for_minmax[2] + + # GH 15048 + base_df = pd.DataFrame( + {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} + ) + expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] + + df = base_df.astype(dtype) + + expected = pd.DataFrame({"B": expected_maxs}).astype(dtype) + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test w/ max value for dtype + df.loc[[2, 6], "B"] = max_val + expected.loc[[2, 3, 6, 7], "B"] = max_val + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test nan in some values + base_df.loc[[0, 2, 4, 6], "B"] = np.nan + expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) result = base_df.groupby("A").cummax() - tm.assert_frame_equal(expected, result) - result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() - tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(result, expected) + expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) # GH 15561 df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b") - for method in ["cummax", "cummin"]: - result = getattr(df.groupby("a")["b"], method)() - tm.assert_series_equal(expected, result) + + result = df.groupby("a")["b"].cummax() + tm.assert_series_equal(expected, result) # GH 15635 df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) @@ -784,10 +819,20 @@ def test_cummin_cummax(): expected = pd.Series([2, 1, 2], name="b") tm.assert_series_equal(result, expected) - df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) - result = df.groupby("a").b.cummin() - expected = pd.Series([1, 2, 1], name="b") - tm.assert_series_equal(result, expected) + +@pytest.mark.xfail( + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, +) +def test_cummax_all_nan_column(): + base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + + expected = pd.DataFrame({"B": [np.nan] * 8}) + result = base_df.groupby("A").cummax() + tm.assert_frame_equal(expected, result) + result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(expected, result) @pytest.mark.parametrize( From 304209f677ec575a42e0720a2793bd8e72994a43 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Wed, 19 Feb 2020 23:51:07 -0500 Subject: [PATCH 079/388] Added pd.NA to nulls_fixture (#31799) --- pandas/conftest.py | 2 +- pandas/tests/arithmetic/test_interval.py | 8 ++++++++ pandas/tests/indexes/multi/test_indexing.py | 4 ++++ pandas/tests/indexes/test_base.py | 7 +++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 7851cba9cd91a..d19bf85877140 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -441,7 +441,7 @@ def other_closed(request): return request.param -@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN")]) +@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN"), pd.NA]) def nulls_fixture(request): """ Fixture for each null type in pandas. diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py index f9e1a515277d5..3f85ac8c190db 100644 --- a/pandas/tests/arithmetic/test_interval.py +++ b/pandas/tests/arithmetic/test_interval.py @@ -129,6 +129,10 @@ def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed): def test_compare_scalar_na(self, op, array, nulls_fixture): result = op(array, nulls_fixture) expected = self.elementwise_comparison(op, array, nulls_fixture) + + if nulls_fixture is pd.NA and array.dtype != pd.IntervalDtype("int"): + pytest.xfail("broken for non-integer IntervalArray; see GH 31882") + tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( @@ -207,6 +211,10 @@ def test_compare_list_like_nan(self, op, array, nulls_fixture): other = [nulls_fixture] * 4 result = op(array, other) expected = self.elementwise_comparison(op, array, other) + + if nulls_fixture is pd.NA: + pytest.xfail("broken for non-integer IntervalArray; see GH 31882") + tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 21a4773fa3683..3c9b34a4a1439 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -384,6 +384,10 @@ def test_get_loc_nan(level, nulls_fixture): key = ["b", "d"] levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) key[level] = nulls_fixture + + if nulls_fixture is pd.NA: + pytest.xfail("MultiIndex from pd.NA in np.array broken; see GH 31883") + idx = MultiIndex.from_product(levels) assert idx.get_loc(tuple(key)) == 3 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c64a70af6f2a4..3b4b6b09dcda5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -305,6 +305,10 @@ def test_index_ctor_infer_nat_dt_like(self, pos, klass, dtype, ctor, nulls_fixtu data = [ctor] data.insert(pos, nulls_fixture) + if nulls_fixture is pd.NA: + expected = Index([pd.NA, pd.NaT]) + pytest.xfail("Broken with np.NaT ctor; see GH 31884") + result = Index(data) tm.assert_index_equal(result, expected) @@ -1964,6 +1968,9 @@ def test_isin_nan_common_float64(self, nulls_fixture): pytest.skip("pd.NaT not compatible with Float64Index") # Float64Index overrides isin, so must be checked separately + if nulls_fixture is pd.NA: + pytest.xfail("Float64Index cannot contain pd.NA") + tm.assert_numpy_array_equal( Float64Index([1.0, nulls_fixture]).isin([np.nan]), np.array([False, True]) ) From 11164c3553eff48b62a42c869fe0610fbcea73db Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 20 Feb 2020 04:44:52 -0800 Subject: [PATCH 080/388] REF: misplaced Series.combine_first tests (#32111) --- .../series/methods/test_combine_first.py | 77 ++++++++++++++++++- pandas/tests/series/test_api.py | 4 - pandas/tests/series/test_combine_concat.py | 72 +---------------- 3 files changed, 77 insertions(+), 76 deletions(-) diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index aed6425e50117..1ee55fbe39513 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -1,6 +1,9 @@ +from datetime import datetime + import numpy as np -from pandas import Period, Series, date_range, period_range +import pandas as pd +from pandas import Period, Series, date_range, period_range, to_datetime import pandas._testing as tm @@ -17,3 +20,75 @@ def test_combine_first_period_datetime(self): result = a.combine_first(b) expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64) tm.assert_series_equal(result, expected) + + def test_combine_first_name(self, datetime_series): + result = datetime_series.combine_first(datetime_series[:5]) + assert result.name == datetime_series.name + + def test_combine_first(self): + values = tm.makeIntIndex(20).values.astype(float) + series = Series(values, index=tm.makeIntIndex(20)) + + series_copy = series * 2 + series_copy[::2] = np.NaN + + # nothing used from the input + combined = series.combine_first(series_copy) + + tm.assert_series_equal(combined, series) + + # Holes filled from input + combined = series_copy.combine_first(series) + assert np.isfinite(combined).all() + + tm.assert_series_equal(combined[::2], series[::2]) + tm.assert_series_equal(combined[1::2], series_copy[1::2]) + + # mixed types + index = tm.makeStringIndex(20) + floats = Series(tm.randn(20), index=index) + strings = Series(tm.makeStringIndex(10), index=index[::2]) + + combined = strings.combine_first(floats) + + tm.assert_series_equal(strings, combined.loc[index[::2]]) + tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]]) + + # corner case + ser = Series([1.0, 2, 3], index=[0, 1, 2]) + empty = Series([], index=[], dtype=object) + result = ser.combine_first(empty) + ser.index = ser.index.astype("O") + tm.assert_series_equal(ser, result) + + def test_combine_first_dt64(self): + + s0 = to_datetime(Series(["2010", np.NaN])) + s1 = to_datetime(Series([np.NaN, "2011"])) + rs = s0.combine_first(s1) + xp = to_datetime(Series(["2010", "2011"])) + tm.assert_series_equal(rs, xp) + + s0 = to_datetime(Series(["2010", np.NaN])) + s1 = Series([np.NaN, "2011"]) + rs = s0.combine_first(s1) + xp = Series([datetime(2010, 1, 1), "2011"]) + tm.assert_series_equal(rs, xp) + + def test_combine_first_dt_tz_values(self, tz_naive_fixture): + ser1 = pd.Series( + pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), + name="ser1", + ) + ser2 = pd.Series( + pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture), + index=[2, 3, 4], + name="ser2", + ) + result = ser1.combine_first(ser2) + exp_vals = pd.DatetimeIndex( + ["20150101", "20150102", "20150103", "20160515", "20160516"], + tz=tz_naive_fixture, + ) + exp = pd.Series(exp_vals, name="ser1") + tm.assert_series_equal(exp, result) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 33706c00c53f4..3e877cf2fc787 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -85,10 +85,6 @@ def test_binop_maybe_preserve_name(self, datetime_series): result = getattr(s, op)(cp) assert result.name is None - def test_combine_first_name(self, datetime_series): - result = datetime_series.combine_first(datetime_series[:5]) - assert result.name == datetime_series.name - def test_getitem_preserve_name(self, datetime_series): result = datetime_series[datetime_series > 0] assert result.name == datetime_series.name diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 4cb471597b67a..4afa083e97c7c 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -1,10 +1,8 @@ -from datetime import datetime - import numpy as np import pytest import pandas as pd -from pandas import DataFrame, Series, to_datetime +from pandas import DataFrame, Series import pandas._testing as tm @@ -22,42 +20,6 @@ def test_combine_scalar(self): expected = pd.Series([min(i * 10, 22) for i in range(5)]) tm.assert_series_equal(result, expected) - def test_combine_first(self): - values = tm.makeIntIndex(20).values.astype(float) - series = Series(values, index=tm.makeIntIndex(20)) - - series_copy = series * 2 - series_copy[::2] = np.NaN - - # nothing used from the input - combined = series.combine_first(series_copy) - - tm.assert_series_equal(combined, series) - - # Holes filled from input - combined = series_copy.combine_first(series) - assert np.isfinite(combined).all() - - tm.assert_series_equal(combined[::2], series[::2]) - tm.assert_series_equal(combined[1::2], series_copy[1::2]) - - # mixed types - index = tm.makeStringIndex(20) - floats = Series(tm.randn(20), index=index) - strings = Series(tm.makeStringIndex(10), index=index[::2]) - - combined = strings.combine_first(floats) - - tm.assert_series_equal(strings, combined.loc[index[::2]]) - tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]]) - - # corner case - s = Series([1.0, 2, 3], index=[0, 1, 2]) - empty = Series([], index=[], dtype=object) - result = s.combine_first(empty) - s.index = s.index.astype("O") - tm.assert_series_equal(s, result) - def test_update(self): s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) s2 = Series([np.nan, 3.5, np.nan, 5.0]) @@ -156,24 +118,6 @@ def get_result_type(dtype, dtype2): result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype assert result.kind == expected - def test_combine_first_dt_tz_values(self, tz_naive_fixture): - ser1 = pd.Series( - pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), - name="ser1", - ) - ser2 = pd.Series( - pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture), - index=[2, 3, 4], - name="ser2", - ) - result = ser1.combine_first(ser2) - exp_vals = pd.DatetimeIndex( - ["20150101", "20150102", "20150103", "20160515", "20160516"], - tz=tz_naive_fixture, - ) - exp = pd.Series(exp_vals, name="ser1") - tm.assert_series_equal(exp, result) - def test_concat_empty_series_dtypes(self): # booleans @@ -250,17 +194,3 @@ def test_concat_empty_series_dtypes(self): # TODO: release-note: concat sparse dtype expected = pd.SparseDtype("object") assert result.dtype == expected - - def test_combine_first_dt64(self): - - s0 = to_datetime(Series(["2010", np.NaN])) - s1 = to_datetime(Series([np.NaN, "2011"])) - rs = s0.combine_first(s1) - xp = to_datetime(Series(["2010", "2011"])) - tm.assert_series_equal(rs, xp) - - s0 = to_datetime(Series(["2010", np.NaN])) - s1 = Series([np.NaN, "2011"]) - rs = s0.combine_first(s1) - xp = Series([datetime(2010, 1, 1), "2011"]) - tm.assert_series_equal(rs, xp) From 37a700699235be283078847447ffdd00a244a295 Mon Sep 17 00:00:00 2001 From: Amy Graham <57148028+amy-graham-js@users.noreply.github.com> Date: Thu, 20 Feb 2020 15:01:12 +0000 Subject: [PATCH 081/388] added msg to TypeError test_to_boolean_array_error (#32103) --- pandas/tests/arrays/test_boolean.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index cb9b07db4a0df..d14d6f3ff0c41 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -131,7 +131,8 @@ def test_to_boolean_array_missing_indicators(a, b): ) def test_to_boolean_array_error(values): # error in converting existing arrays to BooleanArray - with pytest.raises(TypeError): + msg = "Need to pass bool-like value" + with pytest.raises(TypeError, match=msg): pd.array(values, dtype="boolean") From 27e18e5397cb02f4e15eeadc99fec1d80f850668 Mon Sep 17 00:00:00 2001 From: Mabroor Ahmed Date: Thu, 20 Feb 2020 16:39:43 +0000 Subject: [PATCH 082/388] TST: Fix bare pytest.raises in test_parsing.py (#32102) --- pandas/tests/tslibs/test_parsing.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 1ba7832c47e6c..dc7421ea63464 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -2,6 +2,7 @@ Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx """ from datetime import datetime +import re from dateutil.parser import parse import numpy as np @@ -24,7 +25,8 @@ def test_parse_time_string(): def test_parse_time_string_invalid_type(): # Raise on invalid input, don't just return it - with pytest.raises(TypeError): + msg = "Argument 'arg' has incorrect type (expected str, got tuple)" + with pytest.raises(TypeError, match=re.escape(msg)): parse_time_string((4, 5)) @@ -217,7 +219,8 @@ def test_try_parse_dates(): def test_parse_time_string_check_instance_type_raise_exception(): # issue 20684 - with pytest.raises(TypeError): + msg = "Argument 'arg' has incorrect type (expected str, got tuple)" + with pytest.raises(TypeError, match=re.escape(msg)): parse_time_string((1, 2, 3)) result = parse_time_string("2019") From 2c1c36f505678b6776ead1e84643eb17200f82ce Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 20 Feb 2020 22:37:24 +0000 Subject: [PATCH 083/388] TYP: check_untyped_defs arrays.sparse.array (#32099) --- pandas/core/arrays/sparse/array.py | 12 ++++++------ setup.cfg | 3 --- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index b17a4647ffc9f..542cfd334b810 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -4,7 +4,7 @@ from collections import abc import numbers import operator -from typing import Any, Callable +from typing import Any, Callable, Union import warnings import numpy as np @@ -479,7 +479,7 @@ def sp_index(self): return self._sparse_index @property - def sp_values(self): + def sp_values(self) -> np.ndarray: """ An ndarray containing the non- ``fill_value`` values. @@ -798,13 +798,13 @@ def _get_val_at(self, loc): val = com.maybe_box_datetimelike(val, self.sp_values.dtype) return val - def take(self, indices, allow_fill=False, fill_value=None): + def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray": if is_scalar(indices): raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.") indices = np.asarray(indices, dtype=np.int32) if indices.size == 0: - result = [] + result = np.array([], dtype="object") kwargs = {"dtype": self.dtype} elif allow_fill: result = self._take_with_fill(indices, fill_value=fill_value) @@ -815,7 +815,7 @@ def take(self, indices, allow_fill=False, fill_value=None): return type(self)(result, fill_value=self.fill_value, kind=self.kind, **kwargs) - def _take_with_fill(self, indices, fill_value=None): + def _take_with_fill(self, indices, fill_value=None) -> np.ndarray: if fill_value is None: fill_value = self.dtype.na_value @@ -878,7 +878,7 @@ def _take_with_fill(self, indices, fill_value=None): return taken - def _take_without_fill(self, indices): + def _take_without_fill(self, indices) -> Union[np.ndarray, "SparseArray"]: to_shift = indices < 0 indices = indices.copy() diff --git a/setup.cfg b/setup.cfg index 87054f864813f..61d5b1030a500 100644 --- a/setup.cfg +++ b/setup.cfg @@ -150,9 +150,6 @@ check_untyped_defs=False [mypy-pandas.core.arrays.interval] check_untyped_defs=False -[mypy-pandas.core.arrays.sparse.array] -check_untyped_defs=False - [mypy-pandas.core.base] check_untyped_defs=False From a12ab063e2e6eaf6a10518e4374d62fb4c7727eb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 20 Feb 2020 14:37:37 -0800 Subject: [PATCH 084/388] CLN: simplify _setitem_with_indexer (#31887) --- pandas/core/indexing.py | 49 +++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d3e75d43c6bd7..081f87078d9c9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1564,6 +1564,17 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): # ------------------------------------------------------------------- def _setitem_with_indexer(self, indexer, value): + """ + _setitem_with_indexer is for setting values on a Series/DataFrame + using positional indexers. + + If the relevant keys are not present, the Series/DataFrame may be + expanded. + + This method is currently broken when dealing with non-unique Indexes, + since it goes from positional indexers back to labels when calling + BlockManager methods, see GH#12991, GH#22046, GH#15686. + """ # also has the side effect of consolidating in-place from pandas import Series @@ -1678,24 +1689,19 @@ def _setitem_with_indexer(self, indexer, value): info_idx = [info_idx] labels = item_labels[info_idx] - # if we have a partial multiindex, then need to adjust the plane - # indexer here - if len(labels) == 1 and isinstance( - self.obj[labels[0]].axes[0], ABCMultiIndex - ): + if len(labels) == 1: + # We can operate on a single column item = labels[0] - obj = self.obj[item] - index = obj.index - idx = indexer[:info_axis][0] + idx = indexer[0] - plane_indexer = tuple([idx]) + indexer[info_axis + 1 :] - lplane_indexer = length_of_indexer(plane_indexer[0], index) + plane_indexer = tuple([idx]) + lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index) # lplane_indexer gives the expected length of obj[idx] # require that we are setting the right number of values that # we are indexing - if is_list_like_indexer(value) and lplane_indexer != len(value): - + if is_list_like_indexer(value) and 0 != lplane_indexer != len(value): + # Exclude zero-len for e.g. boolean masking that is all-false raise ValueError( "cannot set using a multi-index " "selection indexer with a different " @@ -1704,12 +1710,11 @@ def _setitem_with_indexer(self, indexer, value): # non-mi else: - plane_indexer = indexer[:info_axis] + indexer[info_axis + 1 :] - plane_axis = self.obj.axes[:info_axis][0] - lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) + plane_indexer = indexer[:1] + lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index) def setter(item, v): - s = self.obj[item] + ser = self.obj[item] pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer # perform the equivalent of a setitem on the info axis @@ -1721,16 +1726,16 @@ def setter(item, v): com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj)) for idx in pi ): - s = v + ser = v else: # set the item, possibly having a dtype change - s._consolidate_inplace() - s = s.copy() - s._data = s._data.setitem(indexer=pi, value=v) - s._maybe_update_cacher(clear=True) + ser._consolidate_inplace() + ser = ser.copy() + ser._data = ser._data.setitem(indexer=pi, value=v) + ser._maybe_update_cacher(clear=True) # reset the sliced object if unique - self.obj[item] = s + self.obj[item] = ser # we need an iterable, with a ndim of at least 1 # eg. don't pass through np.array(0) From 73b262284bf879dec713d7b34adf83344824c5bc Mon Sep 17 00:00:00 2001 From: 3vts <3vts@users.noreply.github.com> Date: Thu, 20 Feb 2020 17:32:41 -0600 Subject: [PATCH 085/388] CLN: 29547 replace old string formatting (#32034) --- pandas/core/arrays/interval.py | 12 ++-- pandas/core/util/hashing.py | 2 +- pandas/io/formats/format.py | 65 ++++++++----------- pandas/io/formats/html.py | 29 +++------ pandas/io/formats/latex.py | 39 +++++------ pandas/io/formats/printing.py | 2 +- pandas/io/parsers.py | 4 +- .../tests/arrays/categorical/test_dtypes.py | 10 ++- .../arrays/categorical/test_operators.py | 12 ++-- 9 files changed, 71 insertions(+), 104 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index ab3ee5bbcdc3a..b11736248c12a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1127,8 +1127,8 @@ def __arrow_array__(self, type=None): subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) except TypeError: raise TypeError( - "Conversion to arrow with subtype '{}' " - "is not supported".format(self.dtype.subtype) + f"Conversion to arrow with subtype '{self.dtype.subtype}' " + "is not supported" ) interval_type = ArrowIntervalType(subtype, self.closed) storage_array = pyarrow.StructArray.from_arrays( @@ -1157,14 +1157,12 @@ def __arrow_array__(self, type=None): if not type.equals(interval_type): raise TypeError( "Not supported to convert IntervalArray to type with " - "different 'subtype' ({0} vs {1}) and 'closed' ({2} vs {3}) " - "attributes".format( - self.dtype.subtype, type.subtype, self.closed, type.closed - ) + f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) " + f"and 'closed' ({self.closed} vs {type.closed}) attributes" ) else: raise TypeError( - "Not supported to convert IntervalArray to '{0}' type".format(type) + f"Not supported to convert IntervalArray to '{type}' type" ) return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 160d328ec16ec..d9c8611c94cdb 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -294,7 +294,7 @@ def hash_array( elif issubclass(dtype.type, (np.datetime64, np.timedelta64)): vals = vals.view("i8").astype("u8", copy=False) elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8: - vals = vals.view("u{}".format(vals.dtype.itemsize)).astype("u8") + vals = vals.view(f"u{vals.dtype.itemsize}").astype("u8") else: # With repeated values, its MUCH faster to categorize object dtypes, # then hash and rename categories. We allow skipping the categorization diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 0693c083c9ddc..b5ddd15c1312a 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -187,7 +187,7 @@ def _get_footer(self) -> str: if self.length: if footer: footer += ", " - footer += "Length: {length}".format(length=len(self.categorical)) + footer += f"Length: {len(self.categorical)}" level_info = self.categorical._repr_categories_info() @@ -217,7 +217,6 @@ def to_string(self) -> str: fmt_values = self._get_formatted_values() - fmt_values = ["{i}".format(i=i) for i in fmt_values] fmt_values = [i.strip() for i in fmt_values] values = ", ".join(fmt_values) result = ["[" + values + "]"] @@ -301,28 +300,26 @@ def _get_footer(self) -> str: assert isinstance( self.series.index, (ABCDatetimeIndex, ABCPeriodIndex, ABCTimedeltaIndex) ) - footer += "Freq: {freq}".format(freq=self.series.index.freqstr) + footer += f"Freq: {self.series.index.freqstr}" if self.name is not False and name is not None: if footer: footer += ", " series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n")) - footer += ( - ("Name: {sname}".format(sname=series_name)) if name is not None else "" - ) + footer += f"Name: {series_name}" if self.length is True or (self.length == "truncate" and self.truncate_v): if footer: footer += ", " - footer += "Length: {length}".format(length=len(self.series)) + footer += f"Length: {len(self.series)}" if self.dtype is not False and self.dtype is not None: - name = getattr(self.tr_series.dtype, "name", None) - if name: + dtype_name = getattr(self.tr_series.dtype, "name", None) + if dtype_name: if footer: footer += ", " - footer += "dtype: {typ}".format(typ=pprint_thing(name)) + footer += f"dtype: {pprint_thing(dtype_name)}" # level infos are added to the end and in a new line, like it is done # for Categoricals @@ -359,9 +356,7 @@ def to_string(self) -> str: footer = self._get_footer() if len(series) == 0: - return "{name}([], {footer})".format( - name=type(self.series).__name__, footer=footer - ) + return f"{type(self.series).__name__}([], {footer})" fmt_index, have_header = self._get_formatted_index() fmt_values = self._get_formatted_values() @@ -584,10 +579,8 @@ def __init__( self.formatters = formatters else: raise ValueError( - ( - "Formatters length({flen}) should match " - "DataFrame number of columns({dlen})" - ).format(flen=len(formatters), dlen=len(frame.columns)) + f"Formatters length({len(formatters)}) should match " + f"DataFrame number of columns({len(frame.columns)})" ) self.na_rep = na_rep self.decimal = decimal @@ -816,10 +809,10 @@ def write_result(self, buf: IO[str]) -> None: frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format( - name=type(self.frame).__name__, - col=pprint_thing(frame.columns), - idx=pprint_thing(frame.index), + info_line = ( + f"Empty {type(self.frame).__name__}\n" + f"Columns: {pprint_thing(frame.columns)}\n" + f"Index: {pprint_thing(frame.index)}" ) text = info_line else: @@ -865,11 +858,7 @@ def write_result(self, buf: IO[str]) -> None: buf.writelines(text) if self.should_show_dimensions: - buf.write( - "\n\n[{nrows} rows x {ncols} columns]".format( - nrows=len(frame), ncols=len(frame.columns) - ) - ) + buf.write(f"\n\n[{len(frame)} rows x {len(frame.columns)} columns]") def _join_multiline(self, *args) -> str: lwidth = self.line_width @@ -1074,7 +1063,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: # empty space for columns if self.show_col_idx_names: - col_header = ["{x}".format(x=x) for x in self._get_column_name_list()] + col_header = [str(x) for x in self._get_column_name_list()] else: col_header = [""] * columns.nlevels @@ -1209,10 +1198,8 @@ def _format_strings(self) -> List[str]: if self.float_format is None: float_format = get_option("display.float_format") if float_format is None: - fmt_str = "{{x: .{prec:d}g}}".format( - prec=get_option("display.precision") - ) - float_format = lambda x: fmt_str.format(x=x) + precision = get_option("display.precision") + float_format = lambda x: f"{x: .{precision:d}g}" else: float_format = self.float_format @@ -1238,10 +1225,10 @@ def _format(x): pass return self.na_rep elif isinstance(x, PandasObject): - return "{x}".format(x=x) + return str(x) else: # object dtype - return "{x}".format(x=formatter(x)) + return str(formatter(x)) vals = self.values if isinstance(vals, Index): @@ -1257,7 +1244,7 @@ def _format(x): fmt_values = [] for i, v in enumerate(vals): if not is_float_type[i] and leading_space: - fmt_values.append(" {v}".format(v=_format(v))) + fmt_values.append(f" {_format(v)}") elif is_float_type[i]: fmt_values.append(float_format(v)) else: @@ -1437,7 +1424,7 @@ def _format_strings(self) -> List[str]: class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self) -> List[str]: - formatter = self.formatter or (lambda x: "{x: d}".format(x=x)) + formatter = self.formatter or (lambda x: f"{x: d}") fmt_values = [formatter(x) for x in self.values] return fmt_values @@ -1716,7 +1703,7 @@ def _formatter(x): x = Timedelta(x) result = x._repr_base(format=format) if box: - result = "'{res}'".format(res=result) + result = f"'{result}'" return result return _formatter @@ -1880,16 +1867,16 @@ def __call__(self, num: Union[int, float]) -> str: prefix = self.ENG_PREFIXES[int_pow10] else: if int_pow10 < 0: - prefix = "E-{pow10:02d}".format(pow10=-int_pow10) + prefix = f"E-{-int_pow10:02d}" else: - prefix = "E+{pow10:02d}".format(pow10=int_pow10) + prefix = f"E+{int_pow10:02d}" mant = sign * dnum / (10 ** pow10) if self.accuracy is None: # pragma: no cover format_str = "{mant: g}{prefix}" else: - format_str = "{{mant: .{acc:d}f}}{{prefix}}".format(acc=self.accuracy) + format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}" formatted = format_str.format(mant=mant, prefix=prefix) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index e3161415fe2bc..585e1af3dbc01 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -56,7 +56,7 @@ def __init__( self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links if isinstance(self.fmt.col_space, int): - self.fmt.col_space = "{colspace}px".format(colspace=self.fmt.col_space) + self.fmt.col_space = f"{self.fmt.col_space}px" @property def show_row_idx_names(self) -> bool: @@ -124,7 +124,7 @@ def write_th( """ if header and self.fmt.col_space is not None: tags = tags or "" - tags += 'style="min-width: {colspace};"'.format(colspace=self.fmt.col_space) + tags += f'style="min-width: {self.fmt.col_space};"' self._write_cell(s, kind="th", indent=indent, tags=tags) @@ -135,9 +135,9 @@ def _write_cell( self, s: Any, kind: str = "td", indent: int = 0, tags: Optional[str] = None ) -> None: if tags is not None: - start_tag = "<{kind} {tags}>".format(kind=kind, tags=tags) + start_tag = f"<{kind} {tags}>" else: - start_tag = "<{kind}>".format(kind=kind) + start_tag = f"<{kind}>" if self.escape: # escape & first to prevent double escaping of & @@ -149,17 +149,12 @@ def _write_cell( if self.render_links and is_url(rs): rs_unescaped = pprint_thing(s, escape_chars={}).strip() - start_tag += ''.format(url=rs_unescaped) + start_tag += f'' end_a = "" else: end_a = "" - self.write( - "{start}{rs}{end_a}".format( - start=start_tag, rs=rs, end_a=end_a, kind=kind - ), - indent, - ) + self.write(f"{start_tag}{rs}{end_a}", indent) def write_tr( self, @@ -177,7 +172,7 @@ def write_tr( if align is None: self.write("", indent) else: - self.write(''.format(align=align), indent) + self.write(f'', indent) indent += indent_delta for i, s in enumerate(line): @@ -196,9 +191,7 @@ def render(self) -> List[str]: if self.should_show_dimensions: by = chr(215) # × self.write( - "

{rows} rows {by} {cols} columns

".format( - rows=len(self.frame), by=by, cols=len(self.frame.columns) - ) + f"

{len(self.frame)} rows {by} {len(self.frame.columns)} columns

" ) return self.elements @@ -224,12 +217,10 @@ def _write_table(self, indent: int = 0) -> None: if self.table_id is None: id_section = "" else: - id_section = ' id="{table_id}"'.format(table_id=self.table_id) + id_section = f' id="{self.table_id}"' self.write( - ''.format( - border=self.border, cls=" ".join(_classes), id_section=id_section - ), + f'
', indent, ) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 935762598f78a..3a3ca84642d51 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -58,10 +58,10 @@ def write_result(self, buf: IO[str]) -> None: """ # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: - info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format( - name=type(self.frame).__name__, - col=self.frame.columns, - idx=self.frame.index, + info_line = ( + f"Empty {type(self.frame).__name__}\n" + f"Columns: {self.frame.columns}\n" + f"Index: {self.frame.index}" ) strcols = [[info_line]] else: @@ -140,8 +140,8 @@ def pad_empties(x): buf.write("\\endhead\n") buf.write("\\midrule\n") buf.write( - "\\multicolumn{{{n}}}{{r}}{{{{Continued on next " - "page}}}} \\\\\n".format(n=len(row)) + f"\\multicolumn{{{len(row)}}}{{r}}" + "{{Continued on next page}} \\\\\n" ) buf.write("\\midrule\n") buf.write("\\endfoot\n\n") @@ -171,7 +171,7 @@ def pad_empties(x): if self.bold_rows and self.fmt.index: # bold row labels crow = [ - "\\textbf{{{x}}}".format(x=x) + f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x for j, x in enumerate(crow) @@ -210,9 +210,8 @@ def append_col(): # write multicolumn if needed if ncol > 1: row2.append( - "\\multicolumn{{{ncol:d}}}{{{fmt:s}}}{{{txt:s}}}".format( - ncol=ncol, fmt=self.multicolumn_format, txt=coltext.strip() - ) + f"\\multicolumn{{{ncol:d}}}{{{self.multicolumn_format}}}" + f"{{{coltext.strip()}}}" ) # don't modify where not needed else: @@ -255,9 +254,7 @@ def _format_multirow( break if nrow > 1: # overwrite non-multirow entry - row[j] = "\\multirow{{{nrow:d}}}{{*}}{{{row:s}}}".format( - nrow=nrow, row=row[j].strip() - ) + row[j] = f"\\multirow{{{nrow:d}}}{{*}}{{{row[j].strip()}}}" # save when to end the current block with \cline self.clinebuf.append([i + nrow - 1, j + 1]) return row @@ -268,7 +265,7 @@ def _print_cline(self, buf: IO[str], i: int, icol: int) -> None: """ for cl in self.clinebuf: if cl[0] == i: - buf.write("\\cline{{{cl:d}-{icol:d}}}\n".format(cl=cl[1], icol=icol)) + buf.write(f"\\cline{{{cl[1]:d}-{icol:d}}}\n") # remove entries that have been written to buffer self.clinebuf = [x for x in self.clinebuf if x[0] != i] @@ -292,19 +289,19 @@ def _write_tabular_begin(self, buf, column_format: str): if self.caption is None: caption_ = "" else: - caption_ = "\n\\caption{{{}}}".format(self.caption) + caption_ = f"\n\\caption{{{self.caption}}}" if self.label is None: label_ = "" else: - label_ = "\n\\label{{{}}}".format(self.label) + label_ = f"\n\\label{{{self.label}}}" - buf.write("\\begin{{table}}\n\\centering{}{}\n".format(caption_, label_)) + buf.write(f"\\begin{{table}}\n\\centering{caption_}{label_}\n") else: # then write output only in a tabular environment pass - buf.write("\\begin{{tabular}}{{{fmt}}}\n".format(fmt=column_format)) + buf.write(f"\\begin{{tabular}}{{{column_format}}}\n") def _write_tabular_end(self, buf): """ @@ -340,18 +337,18 @@ def _write_longtable_begin(self, buf, column_format: str): `__ e.g 'rcl' for 3 columns """ - buf.write("\\begin{{longtable}}{{{fmt}}}\n".format(fmt=column_format)) + buf.write(f"\\begin{{longtable}}{{{column_format}}}\n") if self.caption is not None or self.label is not None: if self.caption is None: pass else: - buf.write("\\caption{{{}}}".format(self.caption)) + buf.write(f"\\caption{{{self.caption}}}") if self.label is None: pass else: - buf.write("\\label{{{}}}".format(self.label)) + buf.write(f"\\label{{{self.label}}}") # a double-backslash is required at the end of the line # as discussed here: diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 13b18a0b5fb6f..36e774305b577 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -229,7 +229,7 @@ def as_escaped_string( max_seq_items=max_seq_items, ) elif isinstance(thing, str) and quote_strings: - result = "'{thing}'".format(thing=as_escaped_string(thing)) + result = f"'{as_escaped_string(thing)}'" else: result = as_escaped_string(thing) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d7eb69d3a6048..8a3ad6cb45b57 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3605,8 +3605,8 @@ def get_rows(self, infer_nrows, skiprows=None): def detect_colspecs(self, infer_nrows=100, skiprows=None): # Regex escape the delimiters - delimiters = "".join(r"\{}".format(x) for x in self.delimiter) - pattern = re.compile("([^{}]+)".format(delimiters)) + delimiters = "".join(fr"\{x}" for x in self.delimiter) + pattern = re.compile(f"([^{delimiters}]+)") rows = self.get_rows(infer_nrows, skiprows) if not rows: raise EmptyDataError("No rows from which to infer column width") diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index 19746d7d72162..9922a8863ebc2 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -92,22 +92,20 @@ def test_codes_dtypes(self): result = Categorical(["foo", "bar", "baz"]) assert result.codes.dtype == "int8" - result = Categorical(["foo{i:05d}".format(i=i) for i in range(400)]) + result = Categorical([f"foo{i:05d}" for i in range(400)]) assert result.codes.dtype == "int16" - result = Categorical(["foo{i:05d}".format(i=i) for i in range(40000)]) + result = Categorical([f"foo{i:05d}" for i in range(40000)]) assert result.codes.dtype == "int32" # adding cats result = Categorical(["foo", "bar", "baz"]) assert result.codes.dtype == "int8" - result = result.add_categories(["foo{i:05d}".format(i=i) for i in range(400)]) + result = result.add_categories([f"foo{i:05d}" for i in range(400)]) assert result.codes.dtype == "int16" # removing cats - result = result.remove_categories( - ["foo{i:05d}".format(i=i) for i in range(300)] - ) + result = result.remove_categories([f"foo{i:05d}" for i in range(300)]) assert result.codes.dtype == "int8" @pytest.mark.parametrize("ordered", [True, False]) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 0c830c65e0f8b..6ea003c122eea 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -338,7 +338,7 @@ def test_compare_unordered_different_order(self): def test_numeric_like_ops(self): df = DataFrame({"value": np.random.randint(0, 10000, 100)}) - labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) df = df.sort_values(by=["value"], ascending=True) @@ -353,9 +353,7 @@ def test_numeric_like_ops(self): ("__mul__", r"\*"), ("__truediv__", "/"), ]: - msg = r"Series cannot perform the operation {}|unsupported operand".format( - str_rep - ) + msg = f"Series cannot perform the operation {str_rep}|unsupported operand" with pytest.raises(TypeError, match=msg): getattr(df, op)(df) @@ -363,7 +361,7 @@ def test_numeric_like_ops(self): # min/max) s = df["value_group"] for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]: - msg = "Categorical cannot perform the operation {}".format(op) + msg = f"Categorical cannot perform the operation {op}" with pytest.raises(TypeError, match=msg): getattr(s, op)(numeric_only=False) @@ -383,9 +381,7 @@ def test_numeric_like_ops(self): ("__mul__", r"\*"), ("__truediv__", "/"), ]: - msg = r"Series cannot perform the operation {}|unsupported operand".format( - str_rep - ) + msg = f"Series cannot perform the operation {str_rep}|unsupported operand" with pytest.raises(TypeError, match=msg): getattr(s, op)(2) From 38e16c496603b6c5ea69e830f7657b9f574c9967 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 20 Feb 2020 18:15:03 -0800 Subject: [PATCH 086/388] CLN: remove unused tm.isiterable (#32137) --- pandas/_testing.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 7ebf2c282f8c9..c7c2e315812ac 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -806,10 +806,6 @@ def assert_is_valid_plot_return_object(objs): assert isinstance(objs, (plt.Artist, tuple, dict)), msg -def isiterable(obj): - return hasattr(obj, "__iter__") - - def assert_is_sorted(seq): """Assert that the sequence is sorted.""" if isinstance(seq, (Index, Series)): From cb4f739c1363833044c4e794c431f86288f5bcdd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 21 Feb 2020 14:11:35 +0100 Subject: [PATCH 087/388] CI: skip geopandas downstream test (Anaconda installation issue) (#32148) --- pandas/tests/test_downstream.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 122ef1f47968e..b2a85b539fd86 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -107,6 +107,7 @@ def test_pandas_datareader(): # importing from pandas, Cython import warning @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +@pytest.mark.skip(reason="Anaconda installation issue - GH32144") def test_geopandas(): geopandas = import_module("geopandas") # noqa From c05ef6fe0276c4c2f7403eda5c87cbaea2a60aa5 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 21 Feb 2020 09:48:50 -0500 Subject: [PATCH 088/388] BUG: Fix for convert_dtypes with mix of int and string (#32126) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/dtypes/cast.py | 5 ---- .../series/methods/test_convert_dtypes.py | 28 +++++++++++++++++-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index c9031ac1ae9fe..e0d2de634bf3d 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -45,6 +45,7 @@ Bug fixes - Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) +- Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1c969d40c2c7f..c2b600b5d8c5b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1062,11 +1062,6 @@ def convert_dtypes( if convert_integer: target_int_dtype = "Int64" - if isinstance(inferred_dtype, str) and ( - inferred_dtype == "mixed-integer" - or inferred_dtype == "mixed-integer-float" - ): - inferred_dtype = target_int_dtype if is_integer_dtype(input_array.dtype) and not is_extension_array_dtype( input_array.dtype ): diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index a6b5fed40a9d7..17527a09f07a1 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -81,6 +81,18 @@ class TestSeriesConvertDtypes: ), }, ), + ( # GH32117 + ["h", "i", 1], + np.dtype("O"), + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): np.dtype("O"), + }, + ), ( [10, np.nan, 20], np.dtype("float"), @@ -144,11 +156,23 @@ class TestSeriesConvertDtypes: [1, 2.0], object, { - ((True, False), (True, False), (True,), (True, False)): "Int64", + ((True,), (True, False), (True,), (True, False)): "Int64", ((True,), (True, False), (False,), (True, False)): np.dtype( "float" ), - ((False,), (True, False), (False,), (True, False)): np.dtype( + ((False,), (True, False), (True, False), (True, False)): np.dtype( + "object" + ), + }, + ), + ( + [1, 2.5], + object, + { + ((True,), (True, False), (True, False), (True, False)): np.dtype( + "float" + ), + ((False,), (True, False), (True, False), (True, False)): np.dtype( "object" ), }, From 7d37ab85c9df9561653c659f29c5d7fca1454c67 Mon Sep 17 00:00:00 2001 From: Pedro Reys Date: Fri, 21 Feb 2020 11:52:23 -0300 Subject: [PATCH 089/388] REGR: read_pickle fallback to encoding=latin_1 upon a UnicodeDecodeError (#32055) When a reading a pickle with MultiIndex columns generated in py27 `pickle_compat.load()` with `enconding=None` would throw an UnicodeDecodeError when reading a pickle created in py27. Now, `read_pickle` catches that exception and fallback to use `latin-1` explicitly. --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/io/pickle.py | 25 ++++++++++--------- pandas/tests/io/data/pickle/test_mi_py27.pkl | Bin 0 -> 1395 bytes pandas/tests/io/test_pickle.py | 17 ++++++++++--- 4 files changed, 27 insertions(+), 16 deletions(-) create mode 100644 pandas/tests/io/data/pickle/test_mi_py27.pkl diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index e0d2de634bf3d..da79f651b63a9 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) +- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index e51f24b551f31..4e731b8ecca11 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -171,21 +171,22 @@ def read_pickle( # 1) try standard library Pickle # 2) try pickle_compat (older pandas version) to handle subclass changes - - excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError) + # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError try: - with warnings.catch_warnings(record=True): - # We want to silence any warnings about, e.g. moved modules. - warnings.simplefilter("ignore", Warning) - return pickle.load(f) - except excs_to_catch: - # e.g. - # "No module named 'pandas.core.sparse.series'" - # "Can't get attribute '__nat_unpickle' on PwD7TRsEv2h?Ktee@rxtj}jd-wJa zQspX|v;t~k1mZV8_@U9n1S9GfW459J8?Eu1HYO^XXdpEYX6 ziP3#UVrvyxC$Z{9@oMLn|V&Pu?$ zDXz0}rkyhr)C9?M*7-jiO=6^|I9^*H@MqGxslCR_HOb-z6JL`Q1PB3c=Yd~EeyMwgCEb}A=6M=wq{M$qYbwD z7k-*N?>3ybM|Pk1#a(;!LgKA6pSj!S-yR$2o^T%{Soc!b<JE34+1007t#D! zQ*UtYXSapDa_Z&QbMDUB*V)O>$rEL==} z>FkD;ofl}YP-_GLagA#d_WaAY6JINZ-D Ni#f!xY|YW4{{SjH`hoxe literal 0 HcmV?d00001 diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 78b630bb5ada1..584a545769c4c 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -382,14 +382,23 @@ def test_read(self, protocol, get_random_path): tm.assert_frame_equal(df, df2) -def test_unicode_decode_error(datapath): +@pytest.mark.parametrize( + ["pickle_file", "excols"], + [ + ("test_py27.pkl", pd.Index(["a", "b", "c"])), + ( + "test_mi_py27.pkl", + pd.MultiIndex.from_arrays([["a", "b", "c"], ["A", "B", "C"]]), + ), + ], +) +def test_unicode_decode_error(datapath, pickle_file, excols): # pickle file written with py27, should be readable without raising - # UnicodeDecodeError, see GH#28645 - path = datapath("io", "data", "pickle", "test_py27.pkl") + # UnicodeDecodeError, see GH#28645 and GH#31988 + path = datapath("io", "data", "pickle", pickle_file) df = pd.read_pickle(path) # just test the columns are correct since the values are random - excols = pd.Index(["a", "b", "c"]) tm.assert_index_equal(df.columns, excols) From 4a05601f80dc25c11eecff36bf181c09dceb64ca Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 10:34:10 -0800 Subject: [PATCH 090/388] CLN: make tm.N, tm.K private (#32138) --- doc/source/user_guide/reshaping.rst | 8 ++------ pandas/_testing.py | 28 ++++++++++++++-------------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index bbec9a770477d..58733b852e3a1 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -17,7 +17,6 @@ Reshaping by pivoting DataFrame objects :suppress: import pandas._testing as tm - tm.N = 3 def unpivot(frame): N, K = frame.shape @@ -27,7 +26,7 @@ Reshaping by pivoting DataFrame objects columns = ['date', 'variable', 'value'] return pd.DataFrame(data, columns=columns) - df = unpivot(tm.makeTimeDataFrame()) + df = unpivot(tm.makeTimeDataFrame(3)) Data is often stored in so-called "stacked" or "record" format: @@ -42,9 +41,6 @@ For the curious here is how the above ``DataFrame`` was created: import pandas._testing as tm - tm.N = 3 - - def unpivot(frame): N, K = frame.shape data = {'value': frame.to_numpy().ravel('F'), @@ -53,7 +49,7 @@ For the curious here is how the above ``DataFrame`` was created: return pd.DataFrame(data, columns=['date', 'variable', 'value']) - df = unpivot(tm.makeTimeDataFrame()) + df = unpivot(tm.makeTimeDataFrame(3)) To select out everything for variable ``A`` we could do: diff --git a/pandas/_testing.py b/pandas/_testing.py index c7c2e315812ac..a70f75d6cfaf4 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -69,8 +69,8 @@ lzma = _import_lzma() -N = 30 -K = 4 +_N = 30 +_K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False # set testing_mode @@ -1790,45 +1790,45 @@ def all_timeseries_index_generator(k=10): # make series def makeFloatSeries(name=None): - index = makeStringIndex(N) - return Series(randn(N), index=index, name=name) + index = makeStringIndex(_N) + return Series(randn(_N), index=index, name=name) def makeStringSeries(name=None): - index = makeStringIndex(N) - return Series(randn(N), index=index, name=name) + index = makeStringIndex(_N) + return Series(randn(_N), index=index, name=name) def makeObjectSeries(name=None): - data = makeStringIndex(N) + data = makeStringIndex(_N) data = Index(data, dtype=object) - index = makeStringIndex(N) + index = makeStringIndex(_N) return Series(data, index=index, name=name) def getSeriesData(): - index = makeStringIndex(N) - return {c: Series(randn(N), index=index) for c in getCols(K)} + index = makeStringIndex(_N) + return {c: Series(randn(_N), index=index) for c in getCols(_K)} def makeTimeSeries(nper=None, freq="B", name=None): if nper is None: - nper = N + nper = _N return Series(randn(nper), index=makeDateIndex(nper, freq=freq), name=name) def makePeriodSeries(nper=None, name=None): if nper is None: - nper = N + nper = _N return Series(randn(nper), index=makePeriodIndex(nper), name=name) def getTimeSeriesData(nper=None, freq="B"): - return {c: makeTimeSeries(nper, freq) for c in getCols(K)} + return {c: makeTimeSeries(nper, freq) for c in getCols(_K)} def getPeriodData(nper=None): - return {c: makePeriodSeries(nper) for c in getCols(K)} + return {c: makePeriodSeries(nper) for c in getCols(_K)} # make frame From d171c871baa13bdfd2fe14c8a5918190314825bb Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 21 Feb 2020 18:35:14 +0000 Subject: [PATCH 091/388] TST: add test for get_loc on tz-aware DatetimeIndex (#32152) --- pandas/tests/indexes/datetimes/test_indexing.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index c358e72538788..ceab670fb5041 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -423,6 +423,17 @@ def test_get_loc(self): with pytest.raises(NotImplementedError): idx.get_loc(time(12, 30), method="pad") + def test_get_loc_tz_aware(self): + # https://github.com/pandas-dev/pandas/issues/32140 + dti = pd.date_range( + pd.Timestamp("2019-12-12 00:00:00", tz="US/Eastern"), + pd.Timestamp("2019-12-13 00:00:00", tz="US/Eastern"), + freq="5s", + ) + key = pd.Timestamp("2019-12-12 10:19:25", tz="US/Eastern") + result = dti.get_loc(key, method="nearest") + assert result == 7433 + def test_get_loc_nat(self): # GH#20464 index = DatetimeIndex(["1/3/2000", "NaT"]) From 9353ca736d512ac28fd788ffc60e9e70393e5f6d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 21 Feb 2020 18:36:09 +0000 Subject: [PATCH 092/388] TST: add test for DataFrame.reindex on nearest tz-aware DatetimeIndex (#32155) --- pandas/tests/frame/indexing/test_indexing.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index fcf0a41e0f74e..636cca0df9d4e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1623,6 +1623,14 @@ def test_reindex_nearest_tz(self, tz_aware_fixture): actual = df.reindex(idx[:3], method="nearest") tm.assert_frame_equal(expected, actual) + def test_reindex_nearest_tz_empty_frame(self): + # https://github.com/pandas-dev/pandas/issues/31964 + dti = pd.DatetimeIndex(["2016-06-26 14:27:26+00:00"]) + df = pd.DataFrame(index=pd.DatetimeIndex(["2016-07-04 14:00:59+00:00"])) + expected = pd.DataFrame(index=dti) + result = df.reindex(dti, method="nearest") + tm.assert_frame_equal(result, expected) + def test_reindex_frame_add_nat(self): rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") df = DataFrame({"A": np.random.randn(len(rng)), "B": rng}) From c0066f32b4667744d7b86d680e7e3e5d9a08e33d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 10:38:12 -0800 Subject: [PATCH 093/388] REG: dont call func on empty input (#32121) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/groupby/groupby.py | 11 ++--------- pandas/tests/groupby/aggregate/test_aggregate.py | 12 ++++++++++++ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index da79f651b63a9..910389f648b60 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -21,6 +21,7 @@ Fixed regressions - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) +- Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index cc46485b4a2e8..f946f0e63a583 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -923,17 +923,10 @@ def _python_agg_general(self, func, *args, **kwargs): try: # if this function is invalid for this dtype, we will ignore it. - func(obj[:0]) + result, counts = self.grouper.agg_series(obj, f) except TypeError: continue - except AssertionError: - raise - except Exception: - # Our function depends on having a non-empty argument - # See test_groupby_agg_err_catching - pass - - result, counts = self.grouper.agg_series(obj, f) + assert result is not None key = base.OutputKey(label=name, position=idx) output[key] = self._try_cast(result, obj, numeric_only=True) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ff99081521ffb..48f8de7e51ae4 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -13,6 +13,18 @@ from pandas.core.groupby.grouper import Grouping +def test_groupby_agg_no_extra_calls(): + # GH#31760 + df = pd.DataFrame({"key": ["a", "b", "c", "c"], "value": [1, 2, 3, 4]}) + gb = df.groupby("key")["value"] + + def dummy_func(x): + assert len(x) != 0 + return x.sum() + + gb.agg(dummy_func) + + def test_agg_regression1(tsframe): grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.agg(np.mean) From 80d37adcc3d9bfbbe17e8aa626d6b5873465ca98 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 21 Feb 2020 19:06:45 +0000 Subject: [PATCH 094/388] DOC: move whatsnew to sync master with Backport PR #32148 (#32151) --- doc/source/whatsnew/v1.0.2.rst | 4 ++++ doc/source/whatsnew/v1.1.0.rst | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 910389f648b60..07afe60c9c22a 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -31,6 +31,10 @@ Fixed regressions Bug fixes ~~~~~~~~~ +**Datetimelike** + +- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) + **Categorical** - Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 13827e8fc4c33..0e2b15974dbbd 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -111,7 +111,6 @@ Datetimelike - Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) -- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) - Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) Timedelta From b9bcdc30765e88718c792b27ab9f3e27054f9fc7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 22 Feb 2020 09:37:37 -0600 Subject: [PATCH 095/388] ENH: Enable indexing with nullable Boolean (#31591) --- doc/source/user_guide/boolean.rst | 10 +++---- doc/source/user_guide/indexing.rst | 12 ++++++-- doc/source/whatsnew/v1.0.2.rst | 29 +++++++++++++++++-- pandas/core/arrays/datetimelike.py | 4 ++- pandas/core/common.py | 7 +---- pandas/core/indexers.py | 14 ++++----- pandas/core/indexing.py | 7 +++-- .../tests/arrays/categorical/test_indexing.py | 9 ++++-- pandas/tests/extension/base/getitem.py | 20 +++++++------ pandas/tests/extension/base/setitem.py | 18 +++++------- pandas/tests/extension/test_numpy.py | 4 --- pandas/tests/indexing/test_check_indexer.py | 12 ++++---- pandas/tests/indexing/test_na_indexing.py | 27 ++++++++++++----- pandas/tests/series/indexing/test_boolean.py | 2 +- 14 files changed, 109 insertions(+), 66 deletions(-) diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst index 4f0ad0e8ceaeb..6370a523b9a0d 100644 --- a/doc/source/user_guide/boolean.rst +++ b/doc/source/user_guide/boolean.rst @@ -20,8 +20,9 @@ Nullable Boolean data type Indexing with NA values ----------------------- -pandas does not allow indexing with NA values. Attempting to do so -will raise a ``ValueError``. +pandas allows indexing with ``NA`` values in a boolean array, which are treated as ``False``. + +.. versionchanged:: 1.0.2 .. ipython:: python :okexcept: @@ -30,12 +31,11 @@ will raise a ``ValueError``. mask = pd.array([True, False, pd.NA], dtype="boolean") s[mask] -The missing values will need to be explicitly filled with True or False prior -to using the array as a mask. +If you would prefer to keep the ``NA`` values you can manually fill them with ``fillna(True)``. .. ipython:: python - s[mask.fillna(False)] + s[mask.fillna(True)] .. _boolean.kleene: diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index a8cdf4a61073d..2bd3ff626f2e1 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -59,7 +59,7 @@ of multi-axis indexing. slices, **both** the start and the stop are included, when present in the index! See :ref:`Slicing with labels ` and :ref:`Endpoints are inclusive `.) - * A boolean array + * A boolean array (any ``NA`` values will be treated as ``False``). * A ``callable`` function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). @@ -75,7 +75,7 @@ of multi-axis indexing. * An integer e.g. ``5``. * A list or array of integers ``[4, 3, 0]``. * A slice object with ints ``1:7``. - * A boolean array. + * A boolean array (any ``NA`` values will be treated as ``False``). * A ``callable`` function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). @@ -374,6 +374,14 @@ For getting values with a boolean array: df1.loc['a'] > 0 df1.loc[:, df1.loc['a'] > 0] +NA values in a boolean array propogate as ``False``: + +.. versionchanged:: 1.0.2 + + mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean") + mask + df1[mask] + For getting a value explicitly: .. ipython:: python diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 07afe60c9c22a..affe019d0ac86 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -26,6 +26,33 @@ Fixed regressions .. --------------------------------------------------------------------------- +Indexing with Nullable Boolean Arrays +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4]) + mask = pd.array([True, True, False, None], dtype="boolean") + s + mask + +*pandas 1.0.0-1.0.1* + +.. code-block:: python + + >>> s[mask] + Traceback (most recent call last): + ... + ValueError: cannot mask with array containing NA / NaN values + +*pandas 1.0.2* + +.. ipython:: python + + s[mask] + .. _whatsnew_102.bug_fixes: Bug fixes @@ -45,8 +72,6 @@ Bug fixes - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). - - **Experimental dtypes** - Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e39d1dc03adf5..854075eaa8d09 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -520,7 +520,9 @@ def __getitem__(self, key): if com.is_bool_indexer(key): # first convert to boolean, because check_array_indexer doesn't # allow object dtype - key = np.asarray(key, dtype=bool) + if is_object_dtype(key): + key = np.asarray(key, dtype=bool) + key = check_array_indexer(self, key) if key.all(): key = slice(0, None, None) diff --git a/pandas/core/common.py b/pandas/core/common.py index 550ce74de5357..705c618fc49dc 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -118,7 +118,6 @@ def is_bool_indexer(key: Any) -> bool: check_array_indexer : Check that `key` is a valid array to index, and convert to an ndarray. """ - na_msg = "cannot mask with array containing NA / NaN values" if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( is_array_like(key) and is_extension_array_dtype(key.dtype) ): @@ -126,16 +125,12 @@ def is_bool_indexer(key: Any) -> bool: key = np.asarray(values_from_object(key)) if not lib.is_bool_array(key): + na_msg = "Cannot mask with non-boolean array containing NA / NaN values" if isna(key).any(): raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): - # an ndarray with bool-dtype by definition has no missing values. - # So we only need to check for NAs in ExtensionArrays - if is_extension_array_dtype(key.dtype): - if np.any(key.isna()): - raise ValueError(na_msg) return True elif isinstance(key, list): try: diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index cb48d4be75c4d..5e53b061dd1c8 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -10,6 +10,7 @@ from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, + is_extension_array_dtype, is_integer_dtype, is_list_like, ) @@ -366,14 +367,11 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: ... IndexError: Boolean index has wrong length: 3 instead of 2. - A ValueError is raised when the mask cannot be converted to - a bool-dtype ndarray. + NA values in a boolean array are treated as False. >>> mask = pd.array([True, pd.NA]) >>> pd.api.indexers.check_array_indexer(arr, mask) - Traceback (most recent call last): - ... - ValueError: Cannot mask with a boolean indexer containing NA values + array([ True, False]) A numpy boolean mask will get passed through (if the length is correct): @@ -425,10 +423,10 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: dtype = indexer.dtype if is_bool_dtype(dtype): - try: + if is_extension_array_dtype(dtype): + indexer = indexer.to_numpy(dtype=bool, na_value=False) + else: indexer = np.asarray(indexer, dtype=bool) - except ValueError: - raise ValueError("Cannot mask with a boolean indexer containing NA values") # GH26658 if len(indexer) != len(array): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 081f87078d9c9..5ae237eb7dc32 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,6 +13,7 @@ is_iterator, is_list_like, is_numeric_dtype, + is_object_dtype, is_scalar, is_sequence, ) @@ -2189,10 +2190,12 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: "the indexed object do not match)." ) result = result.astype(bool)._values - else: - # key might be sparse / object-dtype bool, check_array_indexer needs bool array + elif is_object_dtype(key): + # key might be object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) result = check_array_indexer(index, result) + else: + result = check_array_indexer(index, result) return result diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 85d5a6a3dc3ac..3d9469c252914 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -240,14 +240,17 @@ def test_mask_with_boolean(index): @pytest.mark.parametrize("index", [True, False]) -def test_mask_with_boolean_raises(index): +def test_mask_with_boolean_na_treated_as_false(index): + # https://github.com/pandas-dev/pandas/issues/31503 s = Series(range(3)) idx = Categorical([True, False, None]) if index: idx = CategoricalIndex(idx) - with pytest.raises(ValueError, match="NA / NaN"): - s[idx] + result = s[idx] + expected = s[idx.fillna(False)] + + tm.assert_series_equal(result, expected) @pytest.fixture diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 8615a8df22dcc..b08a64cc076b6 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -158,21 +158,23 @@ def test_getitem_boolean_array_mask(self, data): result = pd.Series(data)[mask] self.assert_series_equal(result, expected) - def test_getitem_boolean_array_mask_raises(self, data): + def test_getitem_boolean_na_treated_as_false(self, data): + # https://github.com/pandas-dev/pandas/issues/31503 mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") mask[:2] = pd.NA + mask[2:4] = True - msg = ( - "Cannot mask with a boolean indexer containing NA values|" - "cannot mask with array containing NA / NaN values" - ) - with pytest.raises(ValueError, match=msg): - data[mask] + result = data[mask] + expected = data[mask.fillna(False)] + + self.assert_extension_array_equal(result, expected) s = pd.Series(data) - with pytest.raises(ValueError): - s[mask] + result = s[mask] + expected = s[mask.fillna(False)] + + self.assert_series_equal(result, expected) @pytest.mark.parametrize( "idx", diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index af70799c0236e..a4fe89df158fa 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -98,8 +98,9 @@ def test_setitem_iloc_scalar_multiple_homogoneous(self, data): [ np.array([True, True, True, False, False]), pd.array([True, True, True, False, False], dtype="boolean"), + pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), ], - ids=["numpy-array", "boolean-array"], + ids=["numpy-array", "boolean-array", "boolean-array-na"], ) def test_setitem_mask(self, data, mask, box_in_series): arr = data[:5].copy() @@ -124,20 +125,17 @@ def test_setitem_mask_raises(self, data, box_in_series): with pytest.raises(IndexError, match="wrong length"): data[mask] = data[0] - def test_setitem_mask_boolean_array_raises(self, data, box_in_series): - # missing values in mask + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series): mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") - mask[:2] = pd.NA + mask[:3] = True + mask[3:5] = pd.NA if box_in_series: data = pd.Series(data) - msg = ( - "Cannot mask with a boolean indexer containing NA values|" - "cannot mask with array containing NA / NaN values" - ) - with pytest.raises(ValueError, match=msg): - data[mask] = data[0] + data[mask] = data[0] + + assert (data[:3] == data[0]).all() @pytest.mark.parametrize( "idx", diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 80a093530a8cd..61c5925383f88 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -415,10 +415,6 @@ def test_setitem_mask(self, data, mask, box_in_series): def test_setitem_mask_raises(self, data, box_in_series): super().test_setitem_mask_raises(data, box_in_series) - @skip_nested - def test_setitem_mask_boolean_array_raises(self, data, box_in_series): - super().test_setitem_mask_boolean_array_raises(data, box_in_series) - @skip_nested @pytest.mark.parametrize( "idx", diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py index 82f8c12229824..69d4065234d93 100644 --- a/pandas/tests/indexing/test_check_indexer.py +++ b/pandas/tests/indexing/test_check_indexer.py @@ -34,12 +34,14 @@ def test_valid_input(indexer, expected): @pytest.mark.parametrize( "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")], ) -def test_bool_raise_missing_values(indexer): - array = np.array([1, 2, 3]) +def test_boolean_na_returns_indexer(indexer): + # https://github.com/pandas-dev/pandas/issues/31503 + arr = np.array([1, 2, 3]) - msg = "Cannot mask with a boolean indexer containing NA values" - with pytest.raises(ValueError, match=msg): - check_array_indexer(array, indexer) + result = check_array_indexer(arr, indexer) + expected = np.array([True, False, False], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index befe4fee8ecf8..345ca30ec77eb 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -62,18 +62,29 @@ def test_series_mask_boolean(values, dtype, mask, box_mask, frame): @pytest.mark.parametrize("frame", [True, False]) -def test_indexing_with_na_raises(frame): +def test_na_treated_as_false(frame): + # https://github.com/pandas-dev/pandas/issues/31503 s = pd.Series([1, 2, 3], name="name") if frame: s = s.to_frame() + mask = pd.array([True, False, None], dtype="boolean") - match = "cannot mask with array containing NA / NaN values" - with pytest.raises(ValueError, match=match): - s[mask] - with pytest.raises(ValueError, match=match): - s.loc[mask] + result = s[mask] + expected = s[mask.fillna(False)] + + result_loc = s.loc[mask] + expected_loc = s.loc[mask.fillna(False)] - with pytest.raises(ValueError, match=match): - s.iloc[mask] + result_iloc = s.iloc[mask] + expected_iloc = s.iloc[mask.fillna(False)] + + if frame: + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result_loc, expected_loc) + tm.assert_frame_equal(result_iloc, expected_iloc) + else: + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result_loc, expected_loc) + tm.assert_series_equal(result_iloc, expected_iloc) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 28f3c0f7429f8..8878a4a6526af 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -72,7 +72,7 @@ def test_getitem_boolean_object(string_series): # nans raise exception omask[5:10] = np.nan - msg = "cannot mask with array containing NA / NaN values" + msg = "Cannot mask with non-boolean array containing NA / NaN values" with pytest.raises(ValueError, match=msg): s[omask] with pytest.raises(ValueError, match=msg): From 9f7cd99052c98b4e6d8eb8b9210fc3d2dac0745d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 07:40:54 -0800 Subject: [PATCH 096/388] REF/TST: collect Index join tests (#32171) --- .../indexes/datetimes/test_date_range.py | 11 - .../tests/indexes/datetimes/test_datetime.py | 38 +- pandas/tests/indexes/datetimes/test_join.py | 131 ++++++ pandas/tests/indexes/datetimes/test_setops.py | 84 ---- .../tests/indexes/datetimes/test_timezones.py | 14 - pandas/tests/indexes/numeric/__init__.py | 0 pandas/tests/indexes/numeric/test_join.py | 388 ++++++++++++++++++ pandas/tests/indexes/period/test_join.py | 43 ++ pandas/tests/indexes/period/test_period.py | 5 - pandas/tests/indexes/period/test_setops.py | 34 +- pandas/tests/indexes/ranges/test_join.py | 174 ++++++++ pandas/tests/indexes/ranges/test_range.py | 168 -------- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_numeric.py | 378 ----------------- pandas/tests/indexes/timedeltas/test_join.py | 37 ++ .../indexes/timedeltas/test_timedelta.py | 31 -- 16 files changed, 778 insertions(+), 762 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_join.py create mode 100644 pandas/tests/indexes/numeric/__init__.py create mode 100644 pandas/tests/indexes/numeric/test_join.py create mode 100644 pandas/tests/indexes/period/test_join.py create mode 100644 pandas/tests/indexes/ranges/test_join.py create mode 100644 pandas/tests/indexes/timedeltas/test_join.py diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 4d0beecbbf5d3..d33351fe94a8c 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -759,17 +759,6 @@ def test_constructor(self): with pytest.raises(TypeError, match=msg): bdate_range(START, END, periods=10, freq=None) - def test_naive_aware_conflicts(self): - naive = bdate_range(START, END, freq=BDay(), tz=None) - aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") - - msg = "tz-naive.*tz-aware" - with pytest.raises(TypeError, match=msg): - naive.join(aware) - - with pytest.raises(TypeError, match=msg): - aware.join(naive) - def test_misc(self): end = datetime(2009, 5, 13) dr = bdate_range(end=end, periods=20) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index ca18d6fbea11a..1a72ef2bdf1aa 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -100,16 +100,13 @@ def test_stringified_slice_with_tz(self): df = DataFrame(np.arange(10), index=idx) df["2013-01-14 23:44:34.437768-05:00":] # no exception here - def test_append_join_nondatetimeindex(self): + def test_append_nondatetimeindex(self): rng = date_range("1/1/2000", periods=10) idx = Index(["a", "b", "c", "d"]) result = rng.append(idx) assert isinstance(result[0], Timestamp) - # it works - rng.join(idx, how="outer") - def test_map(self): rng = date_range("1/1/2000", periods=10) @@ -246,25 +243,6 @@ def test_isin(self): index.isin([index[2], 5]), np.array([False, False, True, False]) ) - def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe( - 10, - 10, - data_gen_f=lambda *args, **kwargs: randn(), - r_idx_type="i", - c_idx_type="dt", - ) - cols = df.columns.join(df.index, how="outer") - joined = cols.join(df.columns) - assert cols.dtype == np.dtype("O") - assert cols.dtype == joined.dtype - tm.assert_numpy_array_equal(cols.values, joined.values) - - def test_join_self(self, join_type): - index = date_range("1/1/2000", periods=10) - joined = index.join(index, how=join_type) - assert index is joined - def assert_index_parameters(self, index): assert index.freq == "40960N" assert index.inferred_freq == "40960N" @@ -282,20 +260,6 @@ def test_ns_index(self): new_index = pd.date_range(start=index[0], end=index[-1], freq=index.freq) self.assert_index_parameters(new_index) - def test_join_with_period_index(self, join_type): - df = tm.makeCustomDataframe( - 10, - 10, - data_gen_f=lambda *args: np.random.randint(2), - c_idx_type="p", - r_idx_type="dt", - ) - s = df.iloc[:5, 0] - - expected = df.columns.astype("O").join(s.index, how=join_type) - result = df.columns.join(s.index, how=join_type) - tm.assert_index_equal(expected, result) - def test_factorize(self): idx1 = DatetimeIndex( ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"] diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py new file mode 100644 index 0000000000000..e4d6958dbd3d8 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -0,0 +1,131 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import DatetimeIndex, Index, Timestamp, date_range, to_datetime +import pandas._testing as tm + +from pandas.tseries.offsets import BDay, BMonthEnd + + +class TestJoin: + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="dt", + ) + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_numpy_array_equal(cols.values, joined.values) + + def test_join_self(self, join_type): + index = date_range("1/1/2000", periods=10) + joined = index.join(index, how=join_type) + assert index is joined + + def test_join_with_period_index(self, join_type): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + s = df.iloc[:5, 0] + + expected = df.columns.astype("O").join(s.index, how=join_type) + result = df.columns.join(s.index, how=join_type) + tm.assert_index_equal(expected, result) + + def test_join_object_index(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.join(idx, how="outer") + assert isinstance(result[0], Timestamp) + + def test_join_utc_convert(self, join_type): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng.tz_convert("US/Eastern") + right = rng.tz_convert("Europe/Berlin") + + result = left.join(left[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == "UTC" + + @pytest.mark.parametrize("sort", [None, False]) + def test_datetimeindex_union_join_empty(self, sort): + dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") + empty = Index([]) + + result = dti.union(empty, sort=sort) + expected = dti.astype("O") + tm.assert_index_equal(result, expected) + + result = dti.join(empty) + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + + def test_join_nonunique(self): + idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) + idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) + rs = idx1.join(idx2, how="outer") + assert rs.is_monotonic + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_outer_join(self, freq): + # should just behave as union + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + rng = date_range(start=start, end=end, freq=freq) + + # overlapping + left = rng[:10] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = rng[:5] + right = rng[10:] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + # non-overlapping, no gap + left = rng[:5] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # overlapping, but different offset + other = date_range(start, end, freq=BMonthEnd()) + + the_join = rng.join(other, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + def test_naive_aware_conflicts(self): + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + naive = date_range(start, end, freq=BDay(), tz=None) + aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong") + + msg = "tz-naive.*tz-aware" + with pytest.raises(TypeError, match=msg): + naive.join(aware) + + with pytest.raises(TypeError, match=msg): + aware.join(naive) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 78188c54b1d85..d58ecbad4c1b3 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -14,7 +14,6 @@ Series, bdate_range, date_range, - to_datetime, ) import pandas._testing as tm @@ -348,25 +347,6 @@ def test_datetimeindex_diff(self, sort): dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98) assert len(dti1.difference(dti2, sort)) == 2 - @pytest.mark.parametrize("sort", [None, False]) - def test_datetimeindex_union_join_empty(self, sort): - dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") - empty = Index([]) - - result = dti.union(empty, sort=sort) - expected = dti.astype("O") - tm.assert_index_equal(result, expected) - - result = dti.join(empty) - assert isinstance(result, DatetimeIndex) - tm.assert_index_equal(result, dti) - - def test_join_nonunique(self): - idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) - idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) - rs = idx1.join(idx2, how="outer") - assert rs.is_monotonic - class TestBusinessDatetimeIndex: def setup_method(self, method): @@ -408,38 +388,6 @@ def test_union(self, sort): the_union = self.rng.union(rng, sort=sort) assert isinstance(the_union, DatetimeIndex) - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how="outer") - assert isinstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how="outer") - assert isinstance(the_join, DatetimeIndex) - assert the_join.freq is None - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how="outer") - assert isinstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how="outer") - assert isinstance(the_join, DatetimeIndex) - assert the_join.freq is None - @pytest.mark.parametrize("sort", [None, False]) def test_union_not_cacheable(self, sort): rng = date_range("1/1/2000", periods=50, freq=Minute()) @@ -556,38 +504,6 @@ def test_union(self, sort): the_union = self.rng.union(rng, sort=sort) assert isinstance(the_union, DatetimeIndex) - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how="outer") - assert isinstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how="outer") - assert isinstance(the_join, DatetimeIndex) - assert the_join.freq is None - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how="outer") - assert isinstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how="outer") - assert isinstance(the_join, DatetimeIndex) - assert the_join.freq is None - def test_intersection_bug(self): # GH #771 a = bdate_range("11/30/2011", "12/31/2011", freq="C") diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 7574e4501f5aa..9c1e8cb0f563f 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -804,20 +804,6 @@ def test_dti_tz_constructors(self, tzstr): # ------------------------------------------------------------- # Unsorted - def test_join_utc_convert(self, join_type): - rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") - - left = rng.tz_convert("US/Eastern") - right = rng.tz_convert("Europe/Berlin") - - result = left.join(left[:-5], how=join_type) - assert isinstance(result, DatetimeIndex) - assert result.tz == left.tz - - result = left.join(right[:-5], how=join_type) - assert isinstance(result, DatetimeIndex) - assert result.tz.zone == "UTC" - @pytest.mark.parametrize( "dtype", [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], diff --git a/pandas/tests/indexes/numeric/__init__.py b/pandas/tests/indexes/numeric/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/numeric/test_join.py b/pandas/tests/indexes/numeric/test_join.py new file mode 100644 index 0000000000000..c8dffa411e5fd --- /dev/null +++ b/pandas/tests/indexes/numeric/test_join.py @@ -0,0 +1,388 @@ +import numpy as np +import pytest + +from pandas import Index, Int64Index, UInt64Index +import pandas._testing as tm + + +class TestJoinInt64Index: + def test_join_non_unique(self): + left = Index([4, 4, 3, 3]) + + joined, lidx, ridx = left.join(left, return_indexers=True) + + exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) + tm.assert_index_equal(joined, exp_joined) + + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + def test_join_inner(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([2, 12]) + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([4, 1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True) + + res2 = index.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([1, 4], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # monotonic + res, lidx, ridx = index.join(other_mono, how="right", return_indexers=True) + eres = other_mono + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self): + index = Int64Index(range(0, 20, 2)) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_outer(self): + index = Int64Index(range(0, 20, 2)) + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp + ) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="outer", return_indexers=True) + noidx_res = index.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp + ) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +class TestJoinUInt64Index: + @pytest.fixture + def index_large(self): + # large values used in TestUInt64Index where no compat needed with Int64/Float64 + large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] + return UInt64Index(large) + + def test_join_inner(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = UInt64Index(2 ** 63 + np.array([10, 25], dtype="uint64")) + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([5, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="inner", return_indexers=True + ) + + res2 = index_large.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([3, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="left", return_indexers=True) + eres = index_large + eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(lidx, elidx) + assert isinstance(other, UInt64Index) + tm.assert_index_equal(res, eres) + assert ridx is None + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="right", return_indexers=True + ) + eres = other_mono + elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) + + assert isinstance(other, UInt64Index) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_index_equal(res, eres) + assert ridx is None + + # non-unique + idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self, index_large): + other = Index( + 2 ** 63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object + ) + + outer = index_large.join(other, how="outer") + outer2 = other.join(index_large, how="outer") + expected = Index( + 2 ** 63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64") + ) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index_large.join(other, how="inner") + inner2 = other.join(index_large, how="inner") + expected = Index(2 ** 63 + np.array([10, 20], dtype="uint64")) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index_large.join(other, how="left") + tm.assert_index_equal(left, index_large.astype(object)) + + left2 = other.join(index_large, how="left") + tm.assert_index_equal(left2, other) + + right = index_large.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index_large, how="right") + tm.assert_index_equal(right2, index_large.astype(object)) + + def test_join_outer(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index_large.join(other, how="outer", return_indexers=True) + noidx_res = index_large.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = UInt64Index( + 2 ** 63 + np.array([0, 1, 2, 7, 10, 12, 15, 20, 25], dtype="uint64") + ) + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="outer", return_indexers=True + ) + noidx_res = index_large.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) diff --git a/pandas/tests/indexes/period/test_join.py b/pandas/tests/indexes/period/test_join.py new file mode 100644 index 0000000000000..9e3df0c32d6d5 --- /dev/null +++ b/pandas/tests/indexes/period/test_join.py @@ -0,0 +1,43 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import Index, PeriodIndex, period_range +import pandas._testing as tm + + +class TestJoin: + def test_joins(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + joined = index.join(index[:-5], how=join_type) + + assert isinstance(joined, PeriodIndex) + assert joined.freq == index.freq + + def test_join_self(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + res = index.join(index, how=join_type) + assert index is res + + def test_join_does_not_recur(self): + df = tm.makeCustomDataframe( + 3, + 2, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + s = df.iloc[:2, 0] + + res = s.index.join(df.columns, how="outer") + expected = Index([s.index[0], s.index[1], df.columns[0], df.columns[1]], object) + tm.assert_index_equal(res, expected) + + def test_join_mismatched_freq_raises(self): + index = period_range("1/1/2000", "1/20/2000", freq="D") + index3 = period_range("1/1/2000", "1/20/2000", freq="2D") + with pytest.raises(IncompatibleFrequency): + index.join(index3) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 4db93e850f579..6479b14e9521e 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -586,11 +586,6 @@ def test_map(self): exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) - def test_join_self(self, join_type): - index = period_range("1/1/2000", periods=10) - joined = index.join(index, how=join_type) - assert index is joined - def test_insert(self): # GH 18295 (test missing) expected = PeriodIndex(["2017Q1", NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 6f254b7b4408d..647d56d33f312 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -4,7 +4,7 @@ from pandas._libs.tslibs import IncompatibleFrequency import pandas as pd -from pandas import Index, PeriodIndex, date_range, period_range +from pandas import PeriodIndex, date_range, period_range import pandas._testing as tm @@ -13,34 +13,6 @@ def _permute(obj): class TestPeriodIndex: - def test_joins(self, join_type): - index = period_range("1/1/2000", "1/20/2000", freq="D") - - joined = index.join(index[:-5], how=join_type) - - assert isinstance(joined, PeriodIndex) - assert joined.freq == index.freq - - def test_join_self(self, join_type): - index = period_range("1/1/2000", "1/20/2000", freq="D") - - res = index.join(index, how=join_type) - assert index is res - - def test_join_does_not_recur(self): - df = tm.makeCustomDataframe( - 3, - 2, - data_gen_f=lambda *args: np.random.randint(2), - c_idx_type="p", - r_idx_type="dt", - ) - s = df.iloc[:2, 0] - - res = s.index.join(df.columns, how="outer") - expected = Index([s.index[0], s.index[1], df.columns[0], df.columns[1]], object) - tm.assert_index_equal(res, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # union @@ -181,10 +153,6 @@ def test_union_misc(self, sort): with pytest.raises(IncompatibleFrequency): index.union(index2, sort=sort) - index3 = period_range("1/1/2000", "1/20/2000", freq="2D") - with pytest.raises(IncompatibleFrequency): - index.join(index3) - # TODO: belongs elsewhere def test_union_dataframe_index(self): rng1 = period_range("1/1/1999", "1/1/2012", freq="M") diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py new file mode 100644 index 0000000000000..76013d2b7a387 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_join.py @@ -0,0 +1,174 @@ +import numpy as np + +from pandas import Index, Int64Index, RangeIndex +import pandas._testing as tm + + +class TestJoin: + def test_join_outer(self): + # join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index( + [0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] + ) + elidx = np.array( + [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], + dtype=np.intp, + ) + eridx = np.array( + [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + dtype=np.intp, + ) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + # Join with non-RangeIndex + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9], dtype=np.intp) + eridx = np.array([9, 7], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + # Join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + # Join with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + + assert isinstance(other, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + def test_join_non_int_index(self): + index = RangeIndex(start=0, stop=20, step=2) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_non_unique(self): + index = RangeIndex(start=0, stop=20, step=2) + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self, join_type): + index = RangeIndex(start=0, stop=20, step=2) + joined = index.join(index, how=join_type) + assert index is joined diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 24616f05c19ce..c1cc23039eeaf 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -294,174 +294,6 @@ def test_get_indexer_decreasing(self, stop): expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) - def test_join_outer(self): - # join with Int64Index - index = self.create_index() - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = index.join(other, how="outer", return_indexers=True) - noidx_res = index.join(other, how="outer") - tm.assert_index_equal(res, noidx_res) - - eres = Int64Index( - [0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] - ) - elidx = np.array( - [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], - dtype=np.intp, - ) - eridx = np.array( - [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], - dtype=np.intp, - ) - - assert isinstance(res, Int64Index) - assert not isinstance(res, RangeIndex) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # join with RangeIndex - other = RangeIndex(25, 14, -1) - - res, lidx, ridx = index.join(other, how="outer", return_indexers=True) - noidx_res = index.join(other, how="outer") - tm.assert_index_equal(res, noidx_res) - - assert isinstance(res, Int64Index) - assert not isinstance(res, RangeIndex) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_inner(self): - # Join with non-RangeIndex - index = self.create_index() - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = index.join(other, how="inner", return_indexers=True) - - # no guarantee of sortedness, so sort for comparison purposes - ind = res.argsort() - res = res.take(ind) - lidx = lidx.take(ind) - ridx = ridx.take(ind) - - eres = Int64Index([16, 18]) - elidx = np.array([8, 9], dtype=np.intp) - eridx = np.array([9, 7], dtype=np.intp) - - assert isinstance(res, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # Join two RangeIndex - other = RangeIndex(25, 14, -1) - - res, lidx, ridx = index.join(other, how="inner", return_indexers=True) - - assert isinstance(res, RangeIndex) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_left(self): - # Join with Int64Index - index = self.create_index() - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = index.join(other, how="left", return_indexers=True) - eres = index - eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) - - assert isinstance(res, RangeIndex) - tm.assert_index_equal(res, eres) - assert lidx is None - tm.assert_numpy_array_equal(ridx, eridx) - - # Join withRangeIndex - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = index.join(other, how="left", return_indexers=True) - - assert isinstance(res, RangeIndex) - tm.assert_index_equal(res, eres) - assert lidx is None - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_right(self): - # Join with Int64Index - index = self.create_index() - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = index.join(other, how="right", return_indexers=True) - eres = other - elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) - - assert isinstance(other, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - assert ridx is None - - # Join withRangeIndex - other = RangeIndex(25, 14, -1) - - res, lidx, ridx = index.join(other, how="right", return_indexers=True) - eres = other - - assert isinstance(other, RangeIndex) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - assert ridx is None - - def test_join_non_int_index(self): - index = self.create_index() - other = Index([3, 6, 7, 8, 10], dtype=object) - - outer = index.join(other, how="outer") - outer2 = other.join(index, how="outer") - expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) - tm.assert_index_equal(outer, outer2) - tm.assert_index_equal(outer, expected) - - inner = index.join(other, how="inner") - inner2 = other.join(index, how="inner") - expected = Index([6, 8, 10]) - tm.assert_index_equal(inner, inner2) - tm.assert_index_equal(inner, expected) - - left = index.join(other, how="left") - tm.assert_index_equal(left, index.astype(object)) - - left2 = other.join(index, how="left") - tm.assert_index_equal(left2, other) - - right = index.join(other, how="right") - tm.assert_index_equal(right, other) - - right2 = other.join(index, how="right") - tm.assert_index_equal(right2, index.astype(object)) - - def test_join_non_unique(self): - index = self.create_index() - other = Index([4, 4, 3, 3]) - - res, lidx, ridx = index.join(other, return_indexers=True) - - eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) - elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) - eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) - - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_self(self, join_type): - index = self.create_index() - joined = index.join(index, how=join_type) - assert index is joined - def test_nbytes(self): # memory savings vs int index diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 3b4b6b09dcda5..77163e7a6a06a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2056,7 +2056,9 @@ def test_slice_keep_name(self): assert index.name == index[1:].name @pytest.mark.parametrize( - "index", ["unicode", "string", "datetime", "int", "float"], indirect=True + "index", + ["unicode", "string", "datetime", "int", "uint", "float"], + indirect=True, ) def test_join_self(self, index, join_type): joined = index.join(index, how=join_type) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1b504ce99604d..10d57d8616cf3 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -580,25 +580,6 @@ def test_identical(self): assert not index.copy(dtype=object).identical(index.copy(dtype=self._dtype)) - def test_join_non_unique(self): - left = Index([4, 4, 3, 3]) - - joined, lidx, ridx = left.join(left, return_indexers=True) - - exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) - tm.assert_index_equal(joined, exp_joined) - - exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) - tm.assert_numpy_array_equal(lidx, exp_lidx) - - exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) - tm.assert_numpy_array_equal(ridx, exp_ridx) - - def test_join_self(self, join_type): - index = self.create_index() - joined = index.join(index, how=join_type) - assert index is joined - def test_union_noncomparable(self): # corner case, non-Int64Index index = self.create_index() @@ -798,175 +779,6 @@ def test_intersection(self): ) tm.assert_index_equal(result, expected) - def test_join_inner(self): - index = self.create_index() - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = index.join(other, how="inner", return_indexers=True) - - # no guarantee of sortedness, so sort for comparison purposes - ind = res.argsort() - res = res.take(ind) - lidx = lidx.take(ind) - ridx = ridx.take(ind) - - eres = Int64Index([2, 12]) - elidx = np.array([1, 6], dtype=np.intp) - eridx = np.array([4, 1], dtype=np.intp) - - assert isinstance(res, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True) - - res2 = index.intersection(other_mono) - tm.assert_index_equal(res, res2) - - elidx = np.array([1, 6], dtype=np.intp) - eridx = np.array([1, 4], dtype=np.intp) - assert isinstance(res, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_left(self): - index = self.create_index() - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = index.join(other, how="left", return_indexers=True) - eres = index - eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp) - - assert isinstance(res, Int64Index) - tm.assert_index_equal(res, eres) - assert lidx is None - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = index.join(other_mono, how="left", return_indexers=True) - eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp) - assert isinstance(res, Int64Index) - tm.assert_index_equal(res, eres) - assert lidx is None - tm.assert_numpy_array_equal(ridx, eridx) - - # non-unique - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) - eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) - elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_right(self): - index = self.create_index() - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = index.join(other, how="right", return_indexers=True) - eres = other - elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) - - assert isinstance(other, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - assert ridx is None - - # monotonic - res, lidx, ridx = index.join(other_mono, how="right", return_indexers=True) - eres = other_mono - elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) - assert isinstance(other, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - assert ridx is None - - # non-unique - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) - eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) - eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_non_int_index(self): - index = self.create_index() - other = Index([3, 6, 7, 8, 10], dtype=object) - - outer = index.join(other, how="outer") - outer2 = other.join(index, how="outer") - expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) - tm.assert_index_equal(outer, outer2) - tm.assert_index_equal(outer, expected) - - inner = index.join(other, how="inner") - inner2 = other.join(index, how="inner") - expected = Index([6, 8, 10]) - tm.assert_index_equal(inner, inner2) - tm.assert_index_equal(inner, expected) - - left = index.join(other, how="left") - tm.assert_index_equal(left, index.astype(object)) - - left2 = other.join(index, how="left") - tm.assert_index_equal(left2, other) - - right = index.join(other, how="right") - tm.assert_index_equal(right, other) - - right2 = other.join(index, how="right") - tm.assert_index_equal(right2, index.astype(object)) - - def test_join_outer(self): - index = self.create_index() - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - # guarantee of sortedness - res, lidx, ridx = index.join(other, how="outer", return_indexers=True) - noidx_res = index.join(other, how="outer") - tm.assert_index_equal(res, noidx_res) - - eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) - elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) - eridx = np.array( - [-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp - ) - - assert isinstance(res, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = index.join(other_mono, how="outer", return_indexers=True) - noidx_res = index.join(other_mono, how="outer") - tm.assert_index_equal(res, noidx_res) - - elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) - eridx = np.array( - [-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp - ) - assert isinstance(res, Int64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - class TestUInt64Index(NumericInt): @@ -1043,196 +855,6 @@ def test_intersection(self, index_large): ) tm.assert_index_equal(result, expected) - def test_join_inner(self, index_large): - other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) - other_mono = UInt64Index( - 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") - ) - - # not monotonic - res, lidx, ridx = index_large.join(other, how="inner", return_indexers=True) - - # no guarantee of sortedness, so sort for comparison purposes - ind = res.argsort() - res = res.take(ind) - lidx = lidx.take(ind) - ridx = ridx.take(ind) - - eres = UInt64Index(2 ** 63 + np.array([10, 25], dtype="uint64")) - elidx = np.array([1, 4], dtype=np.intp) - eridx = np.array([5, 2], dtype=np.intp) - - assert isinstance(res, UInt64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = index_large.join( - other_mono, how="inner", return_indexers=True - ) - - res2 = index_large.intersection(other_mono) - tm.assert_index_equal(res, res2) - - elidx = np.array([1, 4], dtype=np.intp) - eridx = np.array([3, 5], dtype=np.intp) - - assert isinstance(res, UInt64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_left(self, index_large): - other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) - other_mono = UInt64Index( - 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") - ) - - # not monotonic - res, lidx, ridx = index_large.join(other, how="left", return_indexers=True) - eres = index_large - eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) - - assert isinstance(res, UInt64Index) - tm.assert_index_equal(res, eres) - assert lidx is None - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = index_large.join(other_mono, how="left", return_indexers=True) - eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) - - assert isinstance(res, UInt64Index) - tm.assert_index_equal(res, eres) - assert lidx is None - tm.assert_numpy_array_equal(ridx, eridx) - - # non-unique - idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) - idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) - res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) - - # 1 is in idx2, so it should be x2 - eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) - eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) - elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_right(self, index_large): - other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) - other_mono = UInt64Index( - 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") - ) - - # not monotonic - res, lidx, ridx = index_large.join(other, how="right", return_indexers=True) - eres = other - elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) - - tm.assert_numpy_array_equal(lidx, elidx) - assert isinstance(other, UInt64Index) - tm.assert_index_equal(res, eres) - assert ridx is None - - # monotonic - res, lidx, ridx = index_large.join( - other_mono, how="right", return_indexers=True - ) - eres = other_mono - elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) - - assert isinstance(other, UInt64Index) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_index_equal(res, eres) - assert ridx is None - - # non-unique - idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) - idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) - res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) - - # 1 is in idx2, so it should be x2 - eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) - elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) - eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_non_int_index(self, index_large): - other = Index( - 2 ** 63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object - ) - - outer = index_large.join(other, how="outer") - outer2 = other.join(index_large, how="outer") - expected = Index( - 2 ** 63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64") - ) - tm.assert_index_equal(outer, outer2) - tm.assert_index_equal(outer, expected) - - inner = index_large.join(other, how="inner") - inner2 = other.join(index_large, how="inner") - expected = Index(2 ** 63 + np.array([10, 20], dtype="uint64")) - tm.assert_index_equal(inner, inner2) - tm.assert_index_equal(inner, expected) - - left = index_large.join(other, how="left") - tm.assert_index_equal(left, index_large.astype(object)) - - left2 = other.join(index_large, how="left") - tm.assert_index_equal(left2, other) - - right = index_large.join(other, how="right") - tm.assert_index_equal(right, other) - - right2 = other.join(index_large, how="right") - tm.assert_index_equal(right2, index_large.astype(object)) - - def test_join_outer(self, index_large): - other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) - other_mono = UInt64Index( - 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") - ) - - # not monotonic - # guarantee of sortedness - res, lidx, ridx = index_large.join(other, how="outer", return_indexers=True) - noidx_res = index_large.join(other, how="outer") - tm.assert_index_equal(res, noidx_res) - - eres = UInt64Index( - 2 ** 63 + np.array([0, 1, 2, 7, 10, 12, 15, 20, 25], dtype="uint64") - ) - elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) - eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) - - assert isinstance(res, UInt64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = index_large.join( - other_mono, how="outer", return_indexers=True - ) - noidx_res = index_large.join(other_mono, how="outer") - tm.assert_index_equal(res, noidx_res) - - elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) - eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) - - assert isinstance(res, UInt64Index) - tm.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - @pytest.mark.parametrize("dtype", ["int64", "uint64"]) def test_int_float_union_dtype(dtype): diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py new file mode 100644 index 0000000000000..3e73ed35dae96 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -0,0 +1,37 @@ +import numpy as np + +from pandas import Index, Timedelta, timedelta_range +import pandas._testing as tm + + +class TestJoin: + def test_append_join_nondatetimeindex(self): + rng = timedelta_range("1 days", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timedelta) + + # it works + rng.join(idx, how="outer") + + def test_join_self(self, join_type): + index = timedelta_range("1 day", periods=10) + joined = index.join(index, how=join_type) + tm.assert_index_equal(index, joined) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="td", + ) + str(df) + + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_index_equal(cols, joined) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 8a91c9d5e09c8..d4a94f8693081 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -91,27 +91,6 @@ def test_factorize(self): tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) - def test_join_self(self, join_type): - index = timedelta_range("1 day", periods=10) - joined = index.join(index, how=join_type) - tm.assert_index_equal(index, joined) - - def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe( - 10, - 10, - data_gen_f=lambda *args, **kwargs: randn(), - r_idx_type="i", - c_idx_type="td", - ) - str(df) - - cols = df.columns.join(df.index, how="outer") - joined = cols.join(df.columns) - assert cols.dtype == np.dtype("O") - assert cols.dtype == joined.dtype - tm.assert_index_equal(cols, joined) - def test_sort_values(self): idx = TimedeltaIndex(["4d", "1d", "2d"]) @@ -181,16 +160,6 @@ def test_hash_error(self): ): hash(index) - def test_append_join_nondatetimeindex(self): - rng = timedelta_range("1 days", periods=10) - idx = Index(["a", "b", "c", "d"]) - - result = rng.append(idx) - assert isinstance(result[0], Timedelta) - - # it works - rng.join(idx, how="outer") - def test_append_numpy_bug_1681(self): td = timedelta_range("1 days", "10 days", freq="2D") From 494ffd6819b59b1156b4b20504de2bf51c9c53ec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 07:43:05 -0800 Subject: [PATCH 097/388] REF: organize base class Index tests (#31864) --- .../tests/indexes/base_class/test_reshape.py | 61 +++++++ .../tests/indexes/base_class/test_setops.py | 73 ++++++++ pandas/tests/indexes/test_any_index.py | 3 +- pandas/tests/indexes/test_base.py | 162 ------------------ pandas/tests/indexes/test_common.py | 15 -- pandas/tests/indexes/test_index_new.py | 49 ++++++ 6 files changed, 185 insertions(+), 178 deletions(-) create mode 100644 pandas/tests/indexes/base_class/test_reshape.py create mode 100644 pandas/tests/indexes/test_index_new.py diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py new file mode 100644 index 0000000000000..61826f2403a4b --- /dev/null +++ b/pandas/tests/indexes/base_class/test_reshape.py @@ -0,0 +1,61 @@ +""" +Tests for ndarray-like method on the base Index class +""" +import pytest + +import pandas as pd +from pandas import Index +import pandas._testing as tm + + +class TestReshape: + def test_repeat(self): + repeats = 2 + index = pd.Index([1, 2, 3]) + expected = pd.Index([1, 1, 2, 2, 3, 3]) + + result = index.repeat(repeats) + tm.assert_index_equal(result, expected) + + def test_insert(self): + + # GH 7256 + # validate neg/pos inserts + result = Index(["b", "c", "d"]) + + # test 0th element + tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a")) + + # test Nth element that follows Python list behavior + tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e")) + + # test loc +/- neq (0, -1) + tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z")) + + # test empty + null_index = Index([]) + tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) + + @pytest.mark.parametrize( + "pos,expected", + [ + (0, Index(["b", "c", "d"], name="index")), + (-1, Index(["a", "b", "c"], name="index")), + ], + ) + def test_delete(self, pos, expected): + index = Index(["a", "b", "c", "d"], name="index") + result = index.delete(pos) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_append_multiple(self): + index = Index(["a", "b", "c", "d", "e", "f"]) + + foos = [index[:2], index[2:4], index[4:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, index) + + # empty + result = index.append([]) + tm.assert_index_equal(result, index) diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index e7d5e21d0ba47..ec3ef8050967c 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -1,12 +1,49 @@ import numpy as np import pytest +import pandas as pd from pandas import Index, Series import pandas._testing as tm from pandas.core.algorithms import safe_sort class TestIndexSetOps: + @pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_setops_disallow_true(self, method): + idx1 = pd.Index(["a", "b"]) + idx2 = pd.Index(["b", "c"]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + + def test_setops_preserve_object_dtype(self): + idx = pd.Index([1, 2, 3], dtype=object) + result = idx.intersection(idx[1:]) + expected = idx[1:] + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, intersection goes through + # a different route + result = idx.intersection(idx[1:][::-1]) + tm.assert_index_equal(result, expected) + + result = idx._union(idx[1:], sort=None) + expected = idx + tm.assert_index_equal(result, expected) + + result = idx.union(idx[1:], sort=None) + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, _union goes through + # a different route + result = idx._union(idx[1:][::-1], sort=None) + tm.assert_index_equal(result, expected) + + result = idx.union(idx[1:][::-1], sort=None) + tm.assert_index_equal(result, expected) + def test_union_base(self): index = Index([0, "a", 1, "b", 2, "c"]) first = index[3:] @@ -28,6 +65,32 @@ def test_union_different_type_base(self, klass): assert tm.equalContents(result, index) + def test_union_sort_other_incomparable(self): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = pd.Index([1, pd.Timestamp("2000")]) + # default (sort=None) + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + + tm.assert_index_equal(result, idx) + + # sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1], sort=None) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + @pytest.mark.xfail(reason="Not implemented") + def test_union_sort_other_incomparable_true(self): + # TODO decide on True behaviour + # sort=True + idx = pd.Index([1, pd.Timestamp("2000")]) + with pytest.raises(TypeError, match=".*"): + idx.union(idx[:1], sort=True) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) @@ -50,6 +113,16 @@ def test_intersection_different_type_base(self, klass, sort): result = first.intersection(klass(second.values), sort=sort) assert tm.equalContents(result, second) + def test_intersect_nosort(self): + result = pd.Index(["c", "b", "a"]).intersection(["b", "a"]) + expected = pd.Index(["b", "a"]) + tm.assert_index_equal(result, expected) + + def test_intersection_equal_sort(self): + idx = pd.Index(["c", "a", "b"]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index 0db63f615c4f8..86881b8984228 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -7,7 +7,8 @@ def test_sort(indices): - with pytest.raises(TypeError): + msg = "cannot sort an Index object in-place, use sort_values instead" + with pytest.raises(TypeError, match=msg): indices.sort() diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 77163e7a6a06a..6327f1b03589b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -14,7 +14,6 @@ from pandas.compat.numpy import np_datetime64_compat from pandas.util._test_decorators import async_mark -from pandas.core.dtypes.common import is_unsigned_integer_dtype from pandas.core.dtypes.generic import ABCIndex import pandas as pd @@ -107,15 +106,6 @@ def test_constructor_copy(self, index): # arr = np.array(5.) # pytest.raises(Exception, arr.view, Index) - @pytest.mark.parametrize("na_value", [None, np.nan]) - @pytest.mark.parametrize("vtype", [list, tuple, iter]) - def test_construction_list_tuples_nan(self, na_value, vtype): - # GH 18505 : valid tuples containing NaN - values = [(1, "two"), (3.0, na_value)] - result = Index(vtype(values)) - expected = MultiIndex.from_tuples(values) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cast_as_obj", [True, False]) @pytest.mark.parametrize( "index", @@ -236,21 +226,6 @@ def __array__(self, dtype=None) -> np.ndarray: result = pd.Index(ArrayLike(array)) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - "dtype", - [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], - ) - def test_constructor_int_dtype_float(self, dtype): - # GH 18400 - if is_unsigned_integer_dtype(dtype): - index_type = UInt64Index - else: - index_type = Int64Index - - expected = index_type([0, 1, 2, 3]) - result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) - tm.assert_index_equal(result, expected) - def test_constructor_int_dtype_nan(self): # see gh-15187 data = [np.nan] @@ -374,19 +349,6 @@ def test_constructor_dtypes_to_float64(self, vals): index = Index(vals, dtype=float) assert isinstance(index, Float64Index) - @pytest.mark.parametrize("cast_index", [True, False]) - @pytest.mark.parametrize( - "vals", [[True, False, True], np.array([True, False, True], dtype=bool)] - ) - def test_constructor_dtypes_to_object(self, cast_index, vals): - if cast_index: - index = Index(vals, dtype=bool) - else: - index = Index(vals) - - assert isinstance(index, Index) - assert index.dtype == object - @pytest.mark.parametrize( "vals", [ @@ -591,25 +553,6 @@ def test_equals_object(self): def test_not_equals_object(self, comp): assert not Index(["a", "b", "c"]).equals(comp) - def test_insert(self): - - # GH 7256 - # validate neg/pos inserts - result = Index(["b", "c", "d"]) - - # test 0th element - tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a")) - - # test Nth element that follows Python list behavior - tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e")) - - # test loc +/- neq (0, -1) - tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z")) - - # test empty - null_index = Index([]) - tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) - def test_insert_missing(self, nulls_fixture): # GH 22295 # test there is no mangling of NA values @@ -617,19 +560,6 @@ def test_insert_missing(self, nulls_fixture): result = Index(list("abc")).insert(1, nulls_fixture) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - "pos,expected", - [ - (0, Index(["b", "c", "d"], name="index")), - (-1, Index(["a", "b", "c"], name="index")), - ], - ) - def test_delete(self, pos, expected): - index = Index(["a", "b", "c", "d"], name="index") - result = index.delete(pos) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - def test_delete_raises(self): index = Index(["a", "b", "c", "d"], name="index") msg = "index 5 is out of bounds for axis 0 with size 4" @@ -843,16 +773,6 @@ def test_intersect_str_dates(self, sort): assert len(result) == 0 - def test_intersect_nosort(self): - result = pd.Index(["c", "b", "a"]).intersection(["b", "a"]) - expected = pd.Index(["b", "a"]) - tm.assert_index_equal(result, expected) - - def test_intersection_equal_sort(self): - idx = pd.Index(["c", "a", "b"]) - tm.assert_index_equal(idx.intersection(idx, sort=False), idx) - tm.assert_index_equal(idx.intersection(idx, sort=None), idx) - @pytest.mark.xfail(reason="Not implemented") def test_intersection_equal_sort_true(self): # TODO decide on True behaviour @@ -914,32 +834,6 @@ def test_union_sort_special_true(self, slice_): expected = pd.Index([0, 1, 2]) tm.assert_index_equal(result, expected) - def test_union_sort_other_incomparable(self): - # https://github.com/pandas-dev/pandas/issues/24959 - idx = pd.Index([1, pd.Timestamp("2000")]) - # default (sort=None) - with tm.assert_produces_warning(RuntimeWarning): - result = idx.union(idx[:1]) - - tm.assert_index_equal(result, idx) - - # sort=None - with tm.assert_produces_warning(RuntimeWarning): - result = idx.union(idx[:1], sort=None) - tm.assert_index_equal(result, idx) - - # sort=False - result = idx.union(idx[:1], sort=False) - tm.assert_index_equal(result, idx) - - @pytest.mark.xfail(reason="Not implemented") - def test_union_sort_other_incomparable_true(self): - # TODO decide on True behaviour - # sort=True - idx = pd.Index([1, pd.Timestamp("2000")]) - with pytest.raises(TypeError, match=".*"): - idx.union(idx[:1], sort=True) - @pytest.mark.parametrize("klass", [np.array, Series, list]) @pytest.mark.parametrize("sort", [None, False]) def test_union_from_iterables(self, index, klass, sort): @@ -1012,42 +906,6 @@ def test_union_dt_as_obj(self, sort): tm.assert_contains_all(index, second_cat) tm.assert_contains_all(date_index, first_cat) - @pytest.mark.parametrize( - "method", ["union", "intersection", "difference", "symmetric_difference"] - ) - def test_setops_disallow_true(self, method): - idx1 = pd.Index(["a", "b"]) - idx2 = pd.Index(["b", "c"]) - - with pytest.raises(ValueError, match="The 'sort' keyword only takes"): - getattr(idx1, method)(idx2, sort=True) - - def test_setops_preserve_object_dtype(self): - idx = pd.Index([1, 2, 3], dtype=object) - result = idx.intersection(idx[1:]) - expected = idx[1:] - tm.assert_index_equal(result, expected) - - # if other is not monotonic increasing, intersection goes through - # a different route - result = idx.intersection(idx[1:][::-1]) - tm.assert_index_equal(result, expected) - - result = idx._union(idx[1:], sort=None) - expected = idx - tm.assert_index_equal(result, expected) - - result = idx.union(idx[1:], sort=None) - tm.assert_index_equal(result, expected) - - # if other is not monotonic increasing, _union goes through - # a different route - result = idx._union(idx[1:][::-1], sort=None) - tm.assert_index_equal(result, expected) - - result = idx.union(idx[1:][::-1], sort=None) - tm.assert_index_equal(result, expected) - def test_map_identity_mapping(self, indices): # GH 12766 tm.assert_index_equal(indices, indices.map(lambda x: x)) @@ -1155,17 +1013,6 @@ def test_map_defaultdict(self): expected = Index(["stuff", "blank", "blank"]) tm.assert_index_equal(result, expected) - def test_append_multiple(self): - index = Index(["a", "b", "c", "d", "e", "f"]) - - foos = [index[:2], index[2:4], index[4:]] - result = foos[0].append(foos[1:]) - tm.assert_index_equal(result, index) - - # empty - result = index.append([]) - tm.assert_index_equal(result, index) - @pytest.mark.parametrize("name,expected", [("foo", "foo"), ("bar", None)]) def test_append_empty_preserve_name(self, name, expected): left = Index([], name="foo") @@ -2446,7 +2293,6 @@ class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ # in py2 and py3 because ints and strings are uncomparable in py3 # (GH 13514) - _holder = Index @pytest.fixture(params=[[0, "a", 1, "b", 2, "c"]], ids=["mixedIndex"]) @@ -2582,14 +2428,6 @@ def test_get_combined_index(self): expected = Index([]) tm.assert_index_equal(result, expected) - def test_repeat(self): - repeats = 2 - index = pd.Index([1, 2, 3]) - expected = pd.Index([1, 1, 2, 2, 3, 3]) - - result = index.repeat(repeats) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( "index", [ diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 7e30233353553..b46e6514b4536 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -158,13 +158,6 @@ def test_set_name_methods(self, indices): assert indices.name == name assert indices.names == [name] - def test_hash_error(self, indices): - index = indices - with pytest.raises( - TypeError, match=f"unhashable type: '{type(index).__name__}'" - ): - hash(indices) - def test_copy_and_deepcopy(self, indices): from copy import copy, deepcopy @@ -246,11 +239,6 @@ def test_get_unique_index(self, indices): result = i._get_unique_index(dropna=dropna) tm.assert_index_equal(result, expected) - def test_sort(self, indices): - msg = "cannot sort an Index object in-place, use sort_values instead" - with pytest.raises(TypeError, match=msg): - indices.sort() - def test_mutability(self, indices): if not len(indices): pytest.skip("Skip check for empty Index") @@ -261,9 +249,6 @@ def test_mutability(self, indices): def test_view(self, indices): assert indices.view().name == indices.name - def test_compat(self, indices): - assert indices.tolist() == list(indices) - def test_searchsorted_monotonic(self, indices): # GH17271 # not implemented for tuple searches in MultiIndex diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py new file mode 100644 index 0000000000000..e150df971da2d --- /dev/null +++ b/pandas/tests/indexes/test_index_new.py @@ -0,0 +1,49 @@ +""" +Tests for the Index constructor conducting inference. +""" +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_unsigned_integer_dtype + +from pandas import Index, Int64Index, MultiIndex, UInt64Index +import pandas._testing as tm + + +class TestIndexConstructorInference: + @pytest.mark.parametrize("na_value", [None, np.nan]) + @pytest.mark.parametrize("vtype", [list, tuple, iter]) + def test_construction_list_tuples_nan(self, na_value, vtype): + # GH#18505 : valid tuples containing NaN + values = [(1, "two"), (3.0, na_value)] + result = Index(vtype(values)) + expected = MultiIndex.from_tuples(values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], + ) + def test_constructor_int_dtype_float(self, dtype): + # GH#18400 + if is_unsigned_integer_dtype(dtype): + index_type = UInt64Index + else: + index_type = Int64Index + + expected = index_type([0, 1, 2, 3]) + result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", [[True, False, True], np.array([True, False, True], dtype=bool)] + ) + def test_constructor_dtypes_to_object(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=bool) + else: + index = Index(vals) + + assert type(index) is Index + assert index.dtype == object From 9e69040f199266cd7a743dfd2f579f271337687f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 22 Feb 2020 17:45:31 +0200 Subject: [PATCH 098/388] CLN: some code cleanups in pandas/_libs/ (#31808) * CLN: some code cleanups in pandas/_libs/ * Reverted "bint" REF: https://github.com/pandas-dev/pandas/pull/31808/files#r376721172 * Added trailing comma to imports REF: https://github.com/pandas-dev/pandas/pull/31808/files#r378523656 * Reverted bad code * Lint issues * Reverted wrong code REF: https://github.com/pandas-dev/pandas/pull/31808#discussion_r379806862 * Removed parens REF: https://github.com/pandas-dev/pandas/pull/31808#discussion_r381626183 * "in fmt" in prev line REF: https://github.com/pandas-dev/pandas/pull/31808#discussion_r381626633 --- pandas/_libs/algos.pyx | 31 ++++++++++++---- pandas/_libs/join.pyx | 20 ++++++++--- pandas/_libs/lib.pyx | 43 ++++++++++++++++------ pandas/_libs/reshape.pyx | 16 +++++++-- pandas/_libs/sparse.pyx | 4 +-- pandas/_libs/tslibs/period.pyx | 60 +++++++++++++++++-------------- pandas/_libs/tslibs/strptime.pyx | 14 +++----- pandas/_libs/tslibs/timezones.pyx | 4 +-- 8 files changed, 129 insertions(+), 63 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 5f3d946a1e024..b7f17aee35a44 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -7,13 +7,30 @@ from libc.math cimport fabs, sqrt import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, - NPY_INT64, NPY_INT32, NPY_INT16, NPY_INT8, - NPY_UINT64, NPY_UINT32, NPY_UINT16, NPY_UINT8, - NPY_FLOAT32, NPY_FLOAT64, - NPY_OBJECT, - int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t) +from numpy cimport ( + NPY_FLOAT32, + NPY_FLOAT64, + NPY_INT8, + NPY_INT16, + NPY_INT32, + NPY_INT64, + NPY_OBJECT, + NPY_UINT8, + NPY_UINT16, + NPY_UINT32, + NPY_UINT64, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) cnp.import_array() diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 093c53790cd35..dfa7aa708d681 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -3,13 +3,25 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, - int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t) +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) cnp.import_array() from pandas._libs.algos import ( - groupsort_indexer, ensure_platform_int, take_1d_int64_int64 + ensure_platform_int, + groupsort_indexer, + take_1d_int64_int64, ) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1990ef66a6bf1..7a18429f21a18 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -15,18 +15,33 @@ from cpython.iterator cimport PyIter_Check from cpython.sequence cimport PySequence_Check from cpython.number cimport PyNumber_Check -from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, - PyTime_Check, PyDelta_Check, - PyDateTime_IMPORT) +from cpython.datetime cimport ( + PyDateTime_Check, + PyDate_Check, + PyTime_Check, + PyDelta_Check, + PyDateTime_IMPORT, +) PyDateTime_IMPORT import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, PyArray_Check, PyArray_GETITEM, - PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, - flatiter, NPY_OBJECT, - int64_t, float32_t, float64_t, - uint8_t, uint64_t, complex128_t) +from numpy cimport ( + NPY_OBJECT, + PyArray_Check, + PyArray_GETITEM, + PyArray_ITER_DATA, + PyArray_ITER_NEXT, + PyArray_IterNew, + complex128_t, + flatiter, + float32_t, + float64_t, + int64_t, + ndarray, + uint8_t, + uint64_t, +) cnp.import_array() cdef extern from "numpy/arrayobject.h": @@ -60,7 +75,12 @@ from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare from pandas._libs.missing cimport ( - checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period, C_NA + checknull, + isnaobj, + is_null_datetime64, + is_null_timedelta64, + is_null_period, + C_NA, ) @@ -246,7 +266,7 @@ def item_from_zerodim(val: object) -> object: @cython.wraparound(False) @cython.boundscheck(False) -def fast_unique_multiple(list arrays, sort: bool=True): +def fast_unique_multiple(list arrays, sort: bool = True): """ Generate a list of unique values from a list of arrays. @@ -277,6 +297,7 @@ def fast_unique_multiple(list arrays, sort: bool=True): if val not in table: table[val] = stub uniques.append(val) + if sort is None: try: uniques.sort() @@ -289,7 +310,7 @@ def fast_unique_multiple(list arrays, sort: bool=True): @cython.wraparound(False) @cython.boundscheck(False) -def fast_unique_multiple_list(lists: list, sort: bool=True) -> list: +def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: cdef: list buf Py_ssize_t k = len(lists) diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 4e831081c8e54..e74b5919a4590 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -1,8 +1,20 @@ import cython from cython import Py_ssize_t -from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t, ndarray) +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + cimport numpy as cnp import numpy as np from pandas._libs.lib cimport c_is_list_like diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 3a6dd506b2428..4ca053a0ee83a 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -448,7 +448,7 @@ cdef class BlockIndex(SparseIndex): ylen = y.blengths # block may be split, but can't exceed original len / 2 + 1 - max_len = int(min(self.length, y.length) / 2) + 1 + max_len = min(self.length, y.length) // 2 + 1 out_bloc = np.empty(max_len, dtype=np.int32) out_blen = np.empty(max_len, dtype=np.int32) @@ -672,7 +672,7 @@ cdef class BlockUnion(BlockMerge): ystart = self.ystart yend = self.yend - max_len = int(min(self.x.length, self.y.length) / 2) + 1 + max_len = min(self.x.length, self.y.length) // 2 + 1 out_bloc = np.empty(max_len, dtype=np.int32) out_blen = np.empty(max_len, dtype=np.int32) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 9419f0eba39aa..c3a47902cff0f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1,8 +1,6 @@ from datetime import datetime -from cpython.object cimport ( - PyObject_RichCompareBool, - Py_EQ, Py_NE) +from cpython.object cimport PyObject_RichCompareBool, Py_EQ, Py_NE from numpy cimport int64_t, import_array, ndarray import numpy as np @@ -14,15 +12,25 @@ from libc.string cimport strlen, memset import cython -from cpython.datetime cimport (PyDateTime_Check, PyDelta_Check, PyDate_Check, - PyDateTime_IMPORT) +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, +) # import datetime C API PyDateTime_IMPORT from pandas._libs.tslibs.np_datetime cimport ( - npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct, - pandas_datetime_to_datetimestruct, check_dts_bounds, - NPY_DATETIMEUNIT, NPY_FR_D, NPY_FR_us) + npy_datetimestruct, + dtstruct_to_dt64, + dt64_to_dtstruct, + pandas_datetime_to_datetimestruct, + check_dts_bounds, + NPY_DATETIMEUNIT, + NPY_FR_D, + NPY_FR_us, +) cdef extern from "src/datetime/np_datetime.h": int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, @@ -37,12 +45,15 @@ from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds cimport pandas._libs.tslibs.ccalendar as ccalendar -from pandas._libs.tslibs.ccalendar cimport ( - dayofweek, get_day_of_year, is_leapyear) +from pandas._libs.tslibs.ccalendar cimport dayofweek, get_day_of_year, is_leapyear from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS from pandas._libs.tslibs.frequencies cimport ( - get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, - get_rule_month) + get_base_alias, + get_freq_code, + get_freq_str, + get_rule_month, + get_to_timestamp_base, +) from pandas._libs.tslibs.parsing import parse_time_string from pandas._libs.tslibs.resolution import Resolution from pandas._libs.tslibs.nattype import nat_strings @@ -55,7 +66,7 @@ from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal cdef: enum: - INT32_MIN = -2147483648 + INT32_MIN = -2_147_483_648 ctypedef struct asfreq_info: @@ -179,8 +190,7 @@ cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil: return asfreq_MtoB elif from_group == FR_WK: return asfreq_WtoB - elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, - FR_MS, FR_US, FR_NS]: + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: return asfreq_DTtoB else: return nofunc @@ -289,17 +299,15 @@ cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back, return DtoB_weekday(unix_date) -cdef inline int64_t upsample_daytime(int64_t ordinal, - asfreq_info *af_info) nogil: - if (af_info.is_end): +cdef inline int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + if af_info.is_end: return (ordinal + 1) * af_info.intraday_conversion_factor - 1 else: return ordinal * af_info.intraday_conversion_factor -cdef inline int64_t downsample_daytime(int64_t ordinal, - asfreq_info *af_info) nogil: - return ordinal // (af_info.intraday_conversion_factor) +cdef inline int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: + return ordinal // af_info.intraday_conversion_factor cdef inline int64_t transform_via_day(int64_t ordinal, @@ -1464,24 +1472,24 @@ def extract_freq(ndarray[object] values): cdef: Py_ssize_t i, n = len(values) - object p + object value for i in range(n): - p = values[i] + value = values[i] try: # now Timestamp / NaT has freq attr - if is_period_object(p): - return p.freq + if is_period_object(value): + return value.freq except AttributeError: pass raise ValueError('freq not specified and cannot be inferred') - # ----------------------------------------------------------------------- # period helpers + @cython.wraparound(False) @cython.boundscheck(False) cdef int64_t[:] localize_dt64arr_to_period(const int64_t[:] stamps, diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 5508b208de00a..dfe050c7bbff7 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -45,8 +45,7 @@ cdef dict _parse_code_table = {'y': 0, 'u': 22} -def array_strptime(object[:] values, object fmt, - bint exact=True, errors='raise'): +def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'): """ Calculates the datetime structs represented by the passed array of strings @@ -78,12 +77,9 @@ def array_strptime(object[:] values, object fmt, if fmt is not None: if '%W' in fmt or '%U' in fmt: if '%Y' not in fmt and '%y' not in fmt: - raise ValueError("Cannot use '%W' or '%U' without " - "day and year") - if ('%A' not in fmt and '%a' not in fmt and '%w' not - in fmt): - raise ValueError("Cannot use '%W' or '%U' without " - "day and year") + raise ValueError("Cannot use '%W' or '%U' without day and year") + if '%A' not in fmt and '%a' not in fmt and '%w' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without day and year") elif '%Z' in fmt and '%z' in fmt: raise ValueError("Cannot parse both %Z and %z") @@ -749,6 +745,6 @@ cdef parse_timezone_directive(str z): microseconds = int(gmtoff_remainder + gmtoff_remainder_padding) total_minutes = ((hours * 60) + minutes + (seconds // 60) + - (microseconds // 60000000)) + (microseconds // 60_000_000)) total_minutes = -total_minutes if z.startswith("-") else total_minutes return pytz.FixedOffset(total_minutes) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 35ee87e714fa8..07947f6677c04 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -196,7 +196,7 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo): arr = np.empty(sz, dtype='i8') for i in range(sz): - arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 + arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 return arr @@ -217,7 +217,7 @@ cdef object get_dst_info(object tz): if cache_key is None: # e.g. pytz.FixedOffset, matplotlib.dates._UTC, # psycopg2.tz.FixedOffsetTimezone - num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 + num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 return (np.array([NPY_NAT + 1], dtype=np.int64), np.array([num], dtype=np.int64), None) From dafec63f2e138d0451dae5b37edea2e83f9adc8a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 07:56:03 -0800 Subject: [PATCH 099/388] BUG: DataFrame.iat incorrectly wrapping datetime objects (#32089) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/frame.py | 4 ++-- pandas/tests/indexing/test_scalar.py | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0e2b15974dbbd..34a67836f9675 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -159,6 +159,7 @@ Indexing - Bug in :meth:`PeriodIndex.is_monotonic` incorrectly returning ``True`` when containing leading ``NaT`` entries (:issue:`31437`) - Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`) - Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`) +- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`) Missing ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9fe1ec7b792c8..b3da22d10eddb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2622,8 +2622,8 @@ def _get_value(self, index, col, takeable: bool = False): scalar """ if takeable: - series = self._iget_item_cache(col) - return com.maybe_box_datetimelike(series._values[index]) + series = self._ixs(col, axis=1) + return series._values[index] series = self._get_item_cache(col) engine = self.index._engine diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 899c58eb5edea..c4750778e2eb8 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -1,4 +1,5 @@ """ test scalar indexing, including at and iat """ +from datetime import datetime, timedelta import numpy as np import pytest @@ -288,3 +289,24 @@ def test_getitem_zerodim_np_array(self): s = Series([1, 2]) result = s[np.array(0)] assert result == 1 + + +def test_iat_dont_wrap_object_datetimelike(): + # GH#32809 .iat calls go through DataFrame._get_value, should not + # call maybe_box_datetimelike + dti = date_range("2016-01-01", periods=3) + tdi = dti - dti + ser = Series(dti.to_pydatetime(), dtype=object) + ser2 = Series(tdi.to_pytimedelta(), dtype=object) + df = DataFrame({"A": ser, "B": ser2}) + assert (df.dtypes == object).all() + + for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]: + assert result is ser[0] + assert isinstance(result, datetime) + assert not isinstance(result, Timestamp) + + for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]: + assert result is ser2[1] + assert isinstance(result, timedelta) + assert not isinstance(result, Timedelta) From 8de40965b2fb9429e5c33741fe710f2630f77328 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:01:53 -0800 Subject: [PATCH 100/388] REF/TST: implement test_interpolate for Series (#32112) --- .../tests/series/methods/test_interpolate.py | 673 ++++++++++++++++++ pandas/tests/series/test_internals.py | 1 + pandas/tests/series/test_missing.py | 670 ----------------- 3 files changed, 674 insertions(+), 670 deletions(-) create mode 100644 pandas/tests/series/methods/test_interpolate.py diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py new file mode 100644 index 0000000000000..6844225a81a8f --- /dev/null +++ b/pandas/tests/series/methods/test_interpolate.py @@ -0,0 +1,673 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import Index, MultiIndex, Series, date_range, isna +import pandas._testing as tm + + +@pytest.fixture( + params=[ + "linear", + "index", + "values", + "nearest", + "slinear", + "zero", + "quadratic", + "cubic", + "barycentric", + "krogh", + "polynomial", + "spline", + "piecewise_polynomial", + "from_derivatives", + "pchip", + "akima", + ] +) +def nontemporal_method(request): + """ Fixture that returns an (method name, required kwargs) pair. + + This fixture does not include method 'time' as a parameterization; that + method requires a Series with a DatetimeIndex, and is generally tested + separately from these non-temporal methods. + """ + method = request.param + kwargs = dict(order=1) if method in ("spline", "polynomial") else dict() + return method, kwargs + + +@pytest.fixture( + params=[ + "linear", + "slinear", + "zero", + "quadratic", + "cubic", + "barycentric", + "krogh", + "polynomial", + "spline", + "piecewise_polynomial", + "from_derivatives", + "pchip", + "akima", + ] +) +def interp_methods_ind(request): + """ Fixture that returns a (method name, required kwargs) pair to + be tested for various Index types. + + This fixture does not include methods - 'time', 'index', 'nearest', + 'values' as a parameterization + """ + method = request.param + kwargs = dict(order=1) if method in ("spline", "polynomial") else dict() + return method, kwargs + + +class TestSeriesInterpolateData: + def test_interpolate(self, datetime_series, string_series): + ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) + + ts_copy = ts.copy() + ts_copy[5:10] = np.NaN + + linear_interp = ts_copy.interpolate(method="linear") + tm.assert_series_equal(linear_interp, ts) + + ord_ts = Series( + [d.toordinal() for d in datetime_series.index], index=datetime_series.index + ).astype(float) + + ord_ts_copy = ord_ts.copy() + ord_ts_copy[5:10] = np.NaN + + time_interp = ord_ts_copy.interpolate(method="time") + tm.assert_series_equal(time_interp, ord_ts) + + def test_interpolate_time_raises_for_non_timeseries(self): + # When method='time' is used on a non-TimeSeries that contains a null + # value, a ValueError should be raised. + non_ts = Series([0, 1, 2, np.NaN]) + msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex" + with pytest.raises(ValueError, match=msg): + non_ts.interpolate(method="time") + + @td.skip_if_no_scipy + def test_interpolate_pchip(self): + + ser = Series(np.sort(np.random.uniform(size=100))) + + # interpolate at new_index + new_index = ser.index.union( + Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) + ).astype(float) + interp_s = ser.reindex(new_index).interpolate(method="pchip") + # does not blow up, GH5977 + interp_s[49:51] + + @td.skip_if_no_scipy + def test_interpolate_akima(self): + + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="akima") + tm.assert_series_equal(interp_s[1:3], expected) + + @td.skip_if_no_scipy + def test_interpolate_piecewise_polynomial(self): + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial") + tm.assert_series_equal(interp_s[1:3], expected) + + @td.skip_if_no_scipy + def test_interpolate_from_derivatives(self): + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="from_derivatives") + tm.assert_series_equal(interp_s[1:3], expected) + + @pytest.mark.parametrize( + "kwargs", + [ + {}, + pytest.param( + {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy + ), + ], + ) + def test_interpolate_corners(self, kwargs): + s = Series([np.nan, np.nan]) + tm.assert_series_equal(s.interpolate(**kwargs), s) + + s = Series([], dtype=object).interpolate() + tm.assert_series_equal(s.interpolate(**kwargs), s) + + def test_interpolate_index_values(self): + s = Series(np.nan, index=np.sort(np.random.rand(30))) + s[::3] = np.random.randn(10) + + vals = s.index.values.astype(float) + + result = s.interpolate(method="index") + + expected = s.copy() + bad = isna(expected.values) + good = ~bad + expected = Series( + np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad] + ) + + tm.assert_series_equal(result[bad], expected) + + # 'values' is synonymous with 'index' for the method kwarg + other_result = s.interpolate(method="values") + + tm.assert_series_equal(other_result, result) + tm.assert_series_equal(other_result[bad], expected) + + def test_interpolate_non_ts(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + msg = ( + "time-weighted interpolation only works on Series or DataFrames " + "with a DatetimeIndex" + ) + with pytest.raises(ValueError, match=msg): + s.interpolate(method="time") + + @pytest.mark.parametrize( + "kwargs", + [ + {}, + pytest.param( + {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy + ), + ], + ) + def test_nan_interpolate(self, kwargs): + s = Series([0, 1, np.nan, 3]) + result = s.interpolate(**kwargs) + expected = Series([0.0, 1.0, 2.0, 3.0]) + tm.assert_series_equal(result, expected) + + def test_nan_irregular_index(self): + s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) + result = s.interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) + tm.assert_series_equal(result, expected) + + def test_nan_str_index(self): + s = Series([0, 1, 2, np.nan], index=list("abcd")) + result = s.interpolate() + expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd")) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_quad(self): + sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) + result = sq.interpolate(method="quadratic") + expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_scipy_basic(self): + s = Series([1, 3, np.nan, 12, np.nan, 25]) + # slinear + expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0]) + result = s.interpolate(method="slinear") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="slinear", downcast="infer") + tm.assert_series_equal(result, expected) + # nearest + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method="nearest") + tm.assert_series_equal(result, expected.astype("float")) + + result = s.interpolate(method="nearest", downcast="infer") + tm.assert_series_equal(result, expected) + # zero + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method="zero") + tm.assert_series_equal(result, expected.astype("float")) + + result = s.interpolate(method="zero", downcast="infer") + tm.assert_series_equal(result, expected) + # quadratic + # GH #15662. + expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0]) + result = s.interpolate(method="quadratic") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="quadratic", downcast="infer") + tm.assert_series_equal(result, expected) + # cubic + expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0]) + result = s.interpolate(method="cubic") + tm.assert_series_equal(result, expected) + + def test_interp_limit(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) + result = s.interpolate(method="linear", limit=2) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("limit", [-1, 0]) + def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit): + # GH 9217: make sure limit is greater than zero. + s = pd.Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + with pytest.raises(ValueError, match="Limit must be greater than 0"): + s.interpolate(limit=limit, method=method, **kwargs) + + def test_interpolate_invalid_float_limit(self, nontemporal_method): + # GH 9217: make sure limit is an integer. + s = pd.Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + limit = 2.0 + with pytest.raises(ValueError, match="Limit must be an integer"): + s.interpolate(limit=limit, method=method, **kwargs) + + @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"]) + def test_interp_invalid_method(self, invalid_method): + s = Series([1, 3, np.nan, 12, np.nan, 25]) + + msg = f"method must be one of.* Got '{invalid_method}' instead" + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method) + + # When an invalid method and invalid limit (such as -1) are + # provided, the error message reflects the invalid method. + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method, limit=-1) + + def test_interp_limit_forward(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + # Provide 'forward' (the default) explicitly here. + expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) + + result = s.interpolate(method="linear", limit=2, limit_direction="forward") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD") + tm.assert_series_equal(result, expected) + + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan]) + expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) + result = s.interpolate(method="linear", limit_direction="both") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) + result = s.interpolate(method="linear", limit_direction="forward") + tm.assert_series_equal(result, expected) + + expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan]) + result = s.interpolate(method="linear", limit_direction="backward") + tm.assert_series_equal(result, expected) + + def test_interp_limit_bad_direction(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + msg = ( + r"Invalid limit_direction: expecting one of \['forward', " + r"'backward', 'both'\], got 'abc'" + ) + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit=2, limit_direction="abc") + + # raises an error even if no limit is specified. + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit_direction="abc") + + # limit_area introduced GH #16284 + def test_interp_limit_area(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="linear", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="linear", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) + result = s.interpolate( + method="linear", limit_area="inside", limit_direction="both", limit=1 + ) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + result = s.interpolate(method="linear", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) + result = s.interpolate(method="linear", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) + result = s.interpolate( + method="linear", limit_area="outside", limit_direction="both", limit=1 + ) + tm.assert_series_equal(result, expected) + + expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + result = s.interpolate( + method="linear", limit_area="outside", limit_direction="backward" + ) + tm.assert_series_equal(result, expected) + + # raises an error even if limit type is wrong. + msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit_area="abc") + + def test_interp_limit_direction(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0]) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + # Check that this works on a longer series of nans. + s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) + + expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="both") + tm.assert_series_equal(result, expected) + + expected = Series( + [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0] + ) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + def test_interp_limit_to_ends(self): + # These test are for issue #10420 -- flow back to beginning. + s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) + + expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan]) + result = s.interpolate(method="linear", limit=2, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="both") + tm.assert_series_equal(result, expected) + + def test_interp_limit_before_ends(self): + # These test are for issue #11115 -- limit ends properly. + s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="forward") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_all_good(self): + s = Series([1, 2, 3]) + result = s.interpolate(method="polynomial", order=1) + tm.assert_series_equal(result, s) + + # non-scipy + result = s.interpolate() + tm.assert_series_equal(result, s) + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_multiIndex(self, check_scipy): + idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) + s = Series([1, 2, np.nan], index=idx) + + expected = s.copy() + expected.loc[2] = 2 + result = s.interpolate() + tm.assert_series_equal(result, expected) + + msg = "Only `method=linear` interpolation is supported on MultiIndexes" + if check_scipy: + with pytest.raises(ValueError, match=msg): + s.interpolate(method="polynomial", order=1) + + @td.skip_if_no_scipy + def test_interp_nonmono_raise(self): + s = Series([1, np.nan, 3], index=[0, 2, 1]) + msg = "krogh interpolation requires that the index be monotonic" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="krogh") + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["nearest", "pad"]) + def test_interp_datetime64(self, method, tz_naive_fixture): + df = Series( + [1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture) + ) + result = df.interpolate(method=method) + expected = Series( + [1.0, 1.0, 3.0], + index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture), + ) + tm.assert_series_equal(result, expected) + + def test_interp_pad_datetime64tz_values(self): + # GH#27628 missing.interpolate_2d should handle datetimetz values + dti = pd.date_range("2015-04-05", periods=3, tz="US/Central") + ser = pd.Series(dti) + ser[1] = pd.NaT + result = ser.interpolate(method="pad") + + expected = pd.Series(dti) + expected[1] = expected[0] + tm.assert_series_equal(result, expected) + + def test_interp_limit_no_nans(self): + # GH 7173 + s = pd.Series([1.0, 2.0, 3.0]) + result = s.interpolate(limit=1) + expected = s + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["polynomial", "spline"]) + def test_no_order(self, method): + # see GH-10633, GH-24014 + s = Series([0, 1, np.nan, 3]) + msg = "You must specify the order of the spline or polynomial" + with pytest.raises(ValueError, match=msg): + s.interpolate(method=method) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan]) + def test_interpolate_spline_invalid_order(self, order): + s = Series([0, 1, np.nan, 3]) + msg = "order needs to be specified and greater than 0" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="spline", order=order) + + @td.skip_if_no_scipy + def test_spline(self): + s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) + result = s.interpolate(method="spline", order=1) + expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_spline_extrapolate(self): + s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) + result3 = s.interpolate(method="spline", order=1, ext=3) + expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0]) + tm.assert_series_equal(result3, expected3) + + result1 = s.interpolate(method="spline", order=1, ext=0) + expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + tm.assert_series_equal(result1, expected1) + + @td.skip_if_no_scipy + def test_spline_smooth(self): + s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) + assert ( + s.interpolate(method="spline", order=3, s=0)[5] + != s.interpolate(method="spline", order=3)[5] + ) + + @td.skip_if_no_scipy + def test_spline_interpolation(self): + s = Series(np.arange(10) ** 2) + s[np.random.randint(0, 9, 3)] = np.nan + result1 = s.interpolate(method="spline", order=1) + expected1 = s.interpolate(method="spline", order=1) + tm.assert_series_equal(result1, expected1) + + def test_interp_timedelta64(self): + # GH 6424 + df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3])) + result = df.interpolate(method="time") + expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # test for non uniform spacing + df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4])) + result = df.interpolate(method="time") + expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4])) + tm.assert_series_equal(result, expected) + + def test_series_interpolate_method_values(self): + # GH#1646 + rng = date_range("1/1/2000", "1/20/2000", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts[::2] = np.nan + + result = ts.interpolate(method="values") + exp = ts.interpolate() + tm.assert_series_equal(result, exp) + + def test_series_interpolate_intraday(self): + # #1698 + index = pd.date_range("1/1/2012", periods=4, freq="12D") + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(days=1)).sort_values() + + exp = ts.reindex(new_index).interpolate(method="time") + + index = pd.date_range("1/1/2012", periods=4, freq="12H") + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() + result = ts.reindex(new_index).interpolate(method="time") + + tm.assert_numpy_array_equal(result.values, exp.values) + + @pytest.mark.parametrize( + "ind", + [ + ["a", "b", "c", "d"], + pd.period_range(start="2019-01-01", periods=4), + pd.interval_range(start=0, end=4), + ], + ) + def test_interp_non_timedelta_index(self, interp_methods_ind, ind): + # gh 21662 + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + if method == "pchip": + pytest.importorskip("scipy") + + if method == "linear": + result = df[0].interpolate(**kwargs) + expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + tm.assert_series_equal(result, expected) + else: + expected_error = ( + "Index column must be numeric or datetime type when " + f"using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating." + ) + with pytest.raises(ValueError, match=expected_error): + df[0].interpolate(method=method, **kwargs) + + def test_interpolate_timedelta_index(self, interp_methods_ind): + """ + Tests for non numerical index types - object, period, timedelta + Note that all methods except time, index, nearest and values + are tested here. + """ + # gh 21662 + ind = pd.timedelta_range(start=1, periods=4) + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + if method == "pchip": + pytest.importorskip("scipy") + + if method in {"linear", "pchip"}: + result = df[0].interpolate(method=method, **kwargs) + expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + tm.assert_series_equal(result, expected) + else: + pytest.skip( + "This interpolation method is not supported for Timedelta Index yet." + ) + + @pytest.mark.parametrize( + "ascending, expected_values", + [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], + ) + def test_interpolate_unsorted_index(self, ascending, expected_values): + # GH 21037 + ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) + result = ts.sort_index(ascending=ascending).interpolate(method="index") + expected = pd.Series(data=expected_values, index=expected_values, dtype=float) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 4c817ed2e2d59..1566d8f36373b 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -169,6 +169,7 @@ def test_convert(self): result = s._convert(datetime=True, coerce=True) tm.assert_series_equal(result, s) + # FIXME: dont leave commented-out # r = s.copy() # r[0] = np.nan # result = r._convert(convert_dates=True,convert_numeric=False) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 6b7d9e00a5228..bac005465034f 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -5,7 +5,6 @@ import pytz from pandas._libs.tslib import iNaT -import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -13,7 +12,6 @@ DataFrame, Index, IntervalIndex, - MultiIndex, NaT, Series, Timedelta, @@ -24,11 +22,6 @@ import pandas._testing as tm -def _simple_ts(start, end, freq="D"): - rng = date_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) - - class TestSeriesMissingData: def test_timedelta_fillna(self): # GH 3371 @@ -988,666 +981,3 @@ def test_series_pad_backfill_limit(self): expected = s[-2:].reindex(index).fillna(method="backfill") expected[:3] = np.nan tm.assert_series_equal(result, expected) - - -@pytest.fixture( - params=[ - "linear", - "index", - "values", - "nearest", - "slinear", - "zero", - "quadratic", - "cubic", - "barycentric", - "krogh", - "polynomial", - "spline", - "piecewise_polynomial", - "from_derivatives", - "pchip", - "akima", - ] -) -def nontemporal_method(request): - """ Fixture that returns an (method name, required kwargs) pair. - - This fixture does not include method 'time' as a parameterization; that - method requires a Series with a DatetimeIndex, and is generally tested - separately from these non-temporal methods. - """ - method = request.param - kwargs = dict(order=1) if method in ("spline", "polynomial") else dict() - return method, kwargs - - -@pytest.fixture( - params=[ - "linear", - "slinear", - "zero", - "quadratic", - "cubic", - "barycentric", - "krogh", - "polynomial", - "spline", - "piecewise_polynomial", - "from_derivatives", - "pchip", - "akima", - ] -) -def interp_methods_ind(request): - """ Fixture that returns a (method name, required kwargs) pair to - be tested for various Index types. - - This fixture does not include methods - 'time', 'index', 'nearest', - 'values' as a parameterization - """ - method = request.param - kwargs = dict(order=1) if method in ("spline", "polynomial") else dict() - return method, kwargs - - -class TestSeriesInterpolateData: - def test_interpolate(self, datetime_series, string_series): - ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) - - ts_copy = ts.copy() - ts_copy[5:10] = np.NaN - - linear_interp = ts_copy.interpolate(method="linear") - tm.assert_series_equal(linear_interp, ts) - - ord_ts = Series( - [d.toordinal() for d in datetime_series.index], index=datetime_series.index - ).astype(float) - - ord_ts_copy = ord_ts.copy() - ord_ts_copy[5:10] = np.NaN - - time_interp = ord_ts_copy.interpolate(method="time") - tm.assert_series_equal(time_interp, ord_ts) - - def test_interpolate_time_raises_for_non_timeseries(self): - # When method='time' is used on a non-TimeSeries that contains a null - # value, a ValueError should be raised. - non_ts = Series([0, 1, 2, np.NaN]) - msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex" - with pytest.raises(ValueError, match=msg): - non_ts.interpolate(method="time") - - @td.skip_if_no_scipy - def test_interpolate_pchip(self): - - ser = Series(np.sort(np.random.uniform(size=100))) - - # interpolate at new_index - new_index = ser.index.union( - Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) - ).astype(float) - interp_s = ser.reindex(new_index).interpolate(method="pchip") - # does not blow up, GH5977 - interp_s[49:51] - - @td.skip_if_no_scipy - def test_interpolate_akima(self): - - ser = Series([10, 11, 12, 13]) - - expected = Series( - [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) - # interpolate at new_index - new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( - float - ) - interp_s = ser.reindex(new_index).interpolate(method="akima") - tm.assert_series_equal(interp_s[1:3], expected) - - @td.skip_if_no_scipy - def test_interpolate_piecewise_polynomial(self): - ser = Series([10, 11, 12, 13]) - - expected = Series( - [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) - # interpolate at new_index - new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( - float - ) - interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial") - tm.assert_series_equal(interp_s[1:3], expected) - - @td.skip_if_no_scipy - def test_interpolate_from_derivatives(self): - ser = Series([10, 11, 12, 13]) - - expected = Series( - [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), - ) - # interpolate at new_index - new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( - float - ) - interp_s = ser.reindex(new_index).interpolate(method="from_derivatives") - tm.assert_series_equal(interp_s[1:3], expected) - - @pytest.mark.parametrize( - "kwargs", - [ - {}, - pytest.param( - {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy - ), - ], - ) - def test_interpolate_corners(self, kwargs): - s = Series([np.nan, np.nan]) - tm.assert_series_equal(s.interpolate(**kwargs), s) - - s = Series([], dtype=object).interpolate() - tm.assert_series_equal(s.interpolate(**kwargs), s) - - def test_interpolate_index_values(self): - s = Series(np.nan, index=np.sort(np.random.rand(30))) - s[::3] = np.random.randn(10) - - vals = s.index.values.astype(float) - - result = s.interpolate(method="index") - - expected = s.copy() - bad = isna(expected.values) - good = ~bad - expected = Series( - np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad] - ) - - tm.assert_series_equal(result[bad], expected) - - # 'values' is synonymous with 'index' for the method kwarg - other_result = s.interpolate(method="values") - - tm.assert_series_equal(other_result, result) - tm.assert_series_equal(other_result[bad], expected) - - def test_interpolate_non_ts(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - msg = ( - "time-weighted interpolation only works on Series or DataFrames " - "with a DatetimeIndex" - ) - with pytest.raises(ValueError, match=msg): - s.interpolate(method="time") - - @pytest.mark.parametrize( - "kwargs", - [ - {}, - pytest.param( - {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy - ), - ], - ) - def test_nan_interpolate(self, kwargs): - s = Series([0, 1, np.nan, 3]) - result = s.interpolate(**kwargs) - expected = Series([0.0, 1.0, 2.0, 3.0]) - tm.assert_series_equal(result, expected) - - def test_nan_irregular_index(self): - s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) - result = s.interpolate() - expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) - tm.assert_series_equal(result, expected) - - def test_nan_str_index(self): - s = Series([0, 1, 2, np.nan], index=list("abcd")) - result = s.interpolate() - expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd")) - tm.assert_series_equal(result, expected) - - @td.skip_if_no_scipy - def test_interp_quad(self): - sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) - result = sq.interpolate(method="quadratic") - expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4]) - tm.assert_series_equal(result, expected) - - @td.skip_if_no_scipy - def test_interp_scipy_basic(self): - s = Series([1, 3, np.nan, 12, np.nan, 25]) - # slinear - expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0]) - result = s.interpolate(method="slinear") - tm.assert_series_equal(result, expected) - - result = s.interpolate(method="slinear", downcast="infer") - tm.assert_series_equal(result, expected) - # nearest - expected = Series([1, 3, 3, 12, 12, 25]) - result = s.interpolate(method="nearest") - tm.assert_series_equal(result, expected.astype("float")) - - result = s.interpolate(method="nearest", downcast="infer") - tm.assert_series_equal(result, expected) - # zero - expected = Series([1, 3, 3, 12, 12, 25]) - result = s.interpolate(method="zero") - tm.assert_series_equal(result, expected.astype("float")) - - result = s.interpolate(method="zero", downcast="infer") - tm.assert_series_equal(result, expected) - # quadratic - # GH #15662. - expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0]) - result = s.interpolate(method="quadratic") - tm.assert_series_equal(result, expected) - - result = s.interpolate(method="quadratic", downcast="infer") - tm.assert_series_equal(result, expected) - # cubic - expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0]) - result = s.interpolate(method="cubic") - tm.assert_series_equal(result, expected) - - def test_interp_limit(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) - result = s.interpolate(method="linear", limit=2) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("limit", [-1, 0]) - def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit): - # GH 9217: make sure limit is greater than zero. - s = pd.Series([1, 2, np.nan, 4]) - method, kwargs = nontemporal_method - with pytest.raises(ValueError, match="Limit must be greater than 0"): - s.interpolate(limit=limit, method=method, **kwargs) - - def test_interpolate_invalid_float_limit(self, nontemporal_method): - # GH 9217: make sure limit is an integer. - s = pd.Series([1, 2, np.nan, 4]) - method, kwargs = nontemporal_method - limit = 2.0 - with pytest.raises(ValueError, match="Limit must be an integer"): - s.interpolate(limit=limit, method=method, **kwargs) - - @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"]) - def test_interp_invalid_method(self, invalid_method): - s = Series([1, 3, np.nan, 12, np.nan, 25]) - - msg = f"method must be one of.* Got '{invalid_method}' instead" - with pytest.raises(ValueError, match=msg): - s.interpolate(method=invalid_method) - - # When an invalid method and invalid limit (such as -1) are - # provided, the error message reflects the invalid method. - with pytest.raises(ValueError, match=msg): - s.interpolate(method=invalid_method, limit=-1) - - def test_interp_limit_forward(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - # Provide 'forward' (the default) explicitly here. - expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) - - result = s.interpolate(method="linear", limit=2, limit_direction="forward") - tm.assert_series_equal(result, expected) - - result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD") - tm.assert_series_equal(result, expected) - - def test_interp_unlimited(self): - # these test are for issue #16282 default Limit=None is unlimited - s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan]) - expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) - result = s.interpolate(method="linear", limit_direction="both") - tm.assert_series_equal(result, expected) - - expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) - result = s.interpolate(method="linear", limit_direction="forward") - tm.assert_series_equal(result, expected) - - expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan]) - result = s.interpolate(method="linear", limit_direction="backward") - tm.assert_series_equal(result, expected) - - def test_interp_limit_bad_direction(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - msg = ( - r"Invalid limit_direction: expecting one of \['forward', " - r"'backward', 'both'\], got 'abc'" - ) - with pytest.raises(ValueError, match=msg): - s.interpolate(method="linear", limit=2, limit_direction="abc") - - # raises an error even if no limit is specified. - with pytest.raises(ValueError, match=msg): - s.interpolate(method="linear", limit_direction="abc") - - # limit_area introduced GH #16284 - def test_interp_limit_area(self): - # These tests are for issue #9218 -- fill NaNs in both directions. - s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - - expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan]) - result = s.interpolate(method="linear", limit_area="inside") - tm.assert_series_equal(result, expected) - - expected = Series( - [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan] - ) - result = s.interpolate(method="linear", limit_area="inside", limit=1) - tm.assert_series_equal(result, expected) - - expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) - result = s.interpolate( - method="linear", limit_area="inside", limit_direction="both", limit=1 - ) - tm.assert_series_equal(result, expected) - - expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) - result = s.interpolate(method="linear", limit_area="outside") - tm.assert_series_equal(result, expected) - - expected = Series( - [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] - ) - result = s.interpolate(method="linear", limit_area="outside", limit=1) - tm.assert_series_equal(result, expected) - - expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) - result = s.interpolate( - method="linear", limit_area="outside", limit_direction="both", limit=1 - ) - tm.assert_series_equal(result, expected) - - expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) - result = s.interpolate( - method="linear", limit_area="outside", limit_direction="backward" - ) - tm.assert_series_equal(result, expected) - - # raises an error even if limit type is wrong. - msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc" - with pytest.raises(ValueError, match=msg): - s.interpolate(method="linear", limit_area="abc") - - def test_interp_limit_direction(self): - # These tests are for issue #9218 -- fill NaNs in both directions. - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0]) - result = s.interpolate(method="linear", limit=2, limit_direction="backward") - tm.assert_series_equal(result, expected) - - expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0]) - result = s.interpolate(method="linear", limit=1, limit_direction="both") - tm.assert_series_equal(result, expected) - - # Check that this works on a longer series of nans. - s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) - - expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]) - result = s.interpolate(method="linear", limit=2, limit_direction="both") - tm.assert_series_equal(result, expected) - - expected = Series( - [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0] - ) - result = s.interpolate(method="linear", limit=1, limit_direction="both") - tm.assert_series_equal(result, expected) - - def test_interp_limit_to_ends(self): - # These test are for issue #10420 -- flow back to beginning. - s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) - - expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan]) - result = s.interpolate(method="linear", limit=2, limit_direction="backward") - tm.assert_series_equal(result, expected) - - expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0]) - result = s.interpolate(method="linear", limit=2, limit_direction="both") - tm.assert_series_equal(result, expected) - - def test_interp_limit_before_ends(self): - # These test are for issue #11115 -- limit ends properly. - s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) - - expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan]) - result = s.interpolate(method="linear", limit=1, limit_direction="forward") - tm.assert_series_equal(result, expected) - - expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan]) - result = s.interpolate(method="linear", limit=1, limit_direction="backward") - tm.assert_series_equal(result, expected) - - expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan]) - result = s.interpolate(method="linear", limit=1, limit_direction="both") - tm.assert_series_equal(result, expected) - - @td.skip_if_no_scipy - def test_interp_all_good(self): - s = Series([1, 2, 3]) - result = s.interpolate(method="polynomial", order=1) - tm.assert_series_equal(result, s) - - # non-scipy - result = s.interpolate() - tm.assert_series_equal(result, s) - - @pytest.mark.parametrize( - "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] - ) - def test_interp_multiIndex(self, check_scipy): - idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) - s = Series([1, 2, np.nan], index=idx) - - expected = s.copy() - expected.loc[2] = 2 - result = s.interpolate() - tm.assert_series_equal(result, expected) - - msg = "Only `method=linear` interpolation is supported on MultiIndexes" - if check_scipy: - with pytest.raises(ValueError, match=msg): - s.interpolate(method="polynomial", order=1) - - @td.skip_if_no_scipy - def test_interp_nonmono_raise(self): - s = Series([1, np.nan, 3], index=[0, 2, 1]) - msg = "krogh interpolation requires that the index be monotonic" - with pytest.raises(ValueError, match=msg): - s.interpolate(method="krogh") - - @td.skip_if_no_scipy - @pytest.mark.parametrize("method", ["nearest", "pad"]) - def test_interp_datetime64(self, method, tz_naive_fixture): - df = Series( - [1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture) - ) - result = df.interpolate(method=method) - expected = Series( - [1.0, 1.0, 3.0], - index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture), - ) - tm.assert_series_equal(result, expected) - - def test_interp_pad_datetime64tz_values(self): - # GH#27628 missing.interpolate_2d should handle datetimetz values - dti = pd.date_range("2015-04-05", periods=3, tz="US/Central") - ser = pd.Series(dti) - ser[1] = pd.NaT - result = ser.interpolate(method="pad") - - expected = pd.Series(dti) - expected[1] = expected[0] - tm.assert_series_equal(result, expected) - - def test_interp_limit_no_nans(self): - # GH 7173 - s = pd.Series([1.0, 2.0, 3.0]) - result = s.interpolate(limit=1) - expected = s - tm.assert_series_equal(result, expected) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("method", ["polynomial", "spline"]) - def test_no_order(self, method): - # see GH-10633, GH-24014 - s = Series([0, 1, np.nan, 3]) - msg = "You must specify the order of the spline or polynomial" - with pytest.raises(ValueError, match=msg): - s.interpolate(method=method) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan]) - def test_interpolate_spline_invalid_order(self, order): - s = Series([0, 1, np.nan, 3]) - msg = "order needs to be specified and greater than 0" - with pytest.raises(ValueError, match=msg): - s.interpolate(method="spline", order=order) - - @td.skip_if_no_scipy - def test_spline(self): - s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) - result = s.interpolate(method="spline", order=1) - expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) - tm.assert_series_equal(result, expected) - - @td.skip_if_no_scipy - def test_spline_extrapolate(self): - s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) - result3 = s.interpolate(method="spline", order=1, ext=3) - expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0]) - tm.assert_series_equal(result3, expected3) - - result1 = s.interpolate(method="spline", order=1, ext=0) - expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) - tm.assert_series_equal(result1, expected1) - - @td.skip_if_no_scipy - def test_spline_smooth(self): - s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) - assert ( - s.interpolate(method="spline", order=3, s=0)[5] - != s.interpolate(method="spline", order=3)[5] - ) - - @td.skip_if_no_scipy - def test_spline_interpolation(self): - s = Series(np.arange(10) ** 2) - s[np.random.randint(0, 9, 3)] = np.nan - result1 = s.interpolate(method="spline", order=1) - expected1 = s.interpolate(method="spline", order=1) - tm.assert_series_equal(result1, expected1) - - def test_interp_timedelta64(self): - # GH 6424 - df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3])) - result = df.interpolate(method="time") - expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3])) - tm.assert_series_equal(result, expected) - - # test for non uniform spacing - df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4])) - result = df.interpolate(method="time") - expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4])) - tm.assert_series_equal(result, expected) - - def test_series_interpolate_method_values(self): - # #1646 - ts = _simple_ts("1/1/2000", "1/20/2000") - ts[::2] = np.nan - - result = ts.interpolate(method="values") - exp = ts.interpolate() - tm.assert_series_equal(result, exp) - - def test_series_interpolate_intraday(self): - # #1698 - index = pd.date_range("1/1/2012", periods=4, freq="12D") - ts = pd.Series([0, 12, 24, 36], index) - new_index = index.append(index + pd.DateOffset(days=1)).sort_values() - - exp = ts.reindex(new_index).interpolate(method="time") - - index = pd.date_range("1/1/2012", periods=4, freq="12H") - ts = pd.Series([0, 12, 24, 36], index) - new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() - result = ts.reindex(new_index).interpolate(method="time") - - tm.assert_numpy_array_equal(result.values, exp.values) - - @pytest.mark.parametrize( - "ind", - [ - ["a", "b", "c", "d"], - pd.period_range(start="2019-01-01", periods=4), - pd.interval_range(start=0, end=4), - ], - ) - def test_interp_non_timedelta_index(self, interp_methods_ind, ind): - # gh 21662 - df = pd.DataFrame([0, 1, np.nan, 3], index=ind) - - method, kwargs = interp_methods_ind - if method == "pchip": - pytest.importorskip("scipy") - - if method == "linear": - result = df[0].interpolate(**kwargs) - expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) - tm.assert_series_equal(result, expected) - else: - expected_error = ( - "Index column must be numeric or datetime type when " - f"using {method} method other than linear. " - "Try setting a numeric or datetime index column before " - "interpolating." - ) - with pytest.raises(ValueError, match=expected_error): - df[0].interpolate(method=method, **kwargs) - - def test_interpolate_timedelta_index(self, interp_methods_ind): - """ - Tests for non numerical index types - object, period, timedelta - Note that all methods except time, index, nearest and values - are tested here. - """ - # gh 21662 - ind = pd.timedelta_range(start=1, periods=4) - df = pd.DataFrame([0, 1, np.nan, 3], index=ind) - - method, kwargs = interp_methods_ind - if method == "pchip": - pytest.importorskip("scipy") - - if method in {"linear", "pchip"}: - result = df[0].interpolate(method=method, **kwargs) - expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) - tm.assert_series_equal(result, expected) - else: - pytest.skip( - "This interpolation method is not supported for Timedelta Index yet." - ) - - @pytest.mark.parametrize( - "ascending, expected_values", - [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], - ) - def test_interpolate_unsorted_index(self, ascending, expected_values): - # GH 21037 - ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) - result = ts.sort_index(ascending=ascending).interpolate(method="index") - expected = pd.Series(data=expected_values, index=expected_values, dtype=float) - tm.assert_series_equal(result, expected) From 1825fa1f546cece1ee06063351b1be93ed43e7cf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:03:20 -0800 Subject: [PATCH 101/388] TST: method-specific files for DataFrame assign, interpolate (#32110) --- pandas/tests/frame/methods/test_assign.py | 82 +++++ .../tests/frame/methods/test_interpolate.py | 286 ++++++++++++++++++ pandas/tests/frame/test_missing.py | 280 ----------------- pandas/tests/frame/test_mutate_columns.py | 76 ----- 4 files changed, 368 insertions(+), 356 deletions(-) create mode 100644 pandas/tests/frame/methods/test_assign.py create mode 100644 pandas/tests/frame/methods/test_interpolate.py diff --git a/pandas/tests/frame/methods/test_assign.py b/pandas/tests/frame/methods/test_assign.py new file mode 100644 index 0000000000000..63b9f031de188 --- /dev/null +++ b/pandas/tests/frame/methods/test_assign.py @@ -0,0 +1,82 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestAssign: + def test_assign(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + original = df.copy() + result = df.assign(C=df.B / df.A) + expected = df.copy() + expected["C"] = [4, 2.5, 2] + tm.assert_frame_equal(result, expected) + + # lambda syntax + result = df.assign(C=lambda x: x.B / x.A) + tm.assert_frame_equal(result, expected) + + # original is unmodified + tm.assert_frame_equal(df, original) + + # Non-Series array-like + result = df.assign(C=[4, 2.5, 2]) + tm.assert_frame_equal(result, expected) + # original is unmodified + tm.assert_frame_equal(df, original) + + result = df.assign(B=df.B / df.A) + expected = expected.drop("B", axis=1).rename(columns={"C": "B"}) + tm.assert_frame_equal(result, expected) + + # overwrite + result = df.assign(A=df.A + df.B) + expected = df.copy() + expected["A"] = [5, 7, 9] + tm.assert_frame_equal(result, expected) + + # lambda + result = df.assign(A=lambda x: x.A + x.B) + tm.assert_frame_equal(result, expected) + + def test_assign_multiple(self): + df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"]) + result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) + expected = DataFrame( + [[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE") + ) + tm.assert_frame_equal(result, expected) + + def test_assign_order(self): + # GH 9818 + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + result = df.assign(D=df.A + df.B, C=df.A - df.B) + + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC")) + tm.assert_frame_equal(result, expected) + result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD")) + + tm.assert_frame_equal(result, expected) + + def test_assign_bad(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + # non-keyword argument + with pytest.raises(TypeError): + df.assign(lambda x: x.A) + with pytest.raises(AttributeError): + df.assign(C=df.A, D=df.A + df.C) + + def test_assign_dependent(self): + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + + result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) + + result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py new file mode 100644 index 0000000000000..3b8fa0dfbb603 --- /dev/null +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -0,0 +1,286 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestDataFrameInterpolate: + def test_interp_basic(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + expected = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0], + "B": [1.0, 4.0, 9.0, 9.0], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + result = df.interpolate() + tm.assert_frame_equal(result, expected) + + result = df.set_index("C").interpolate() + expected = df.set_index("C") + expected.loc[3, "A"] = 3 + expected.loc[5, "B"] = 9 + tm.assert_frame_equal(result, expected) + + def test_interp_bad_method(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + with pytest.raises(ValueError): + df.interpolate(method="not_a_method") + + def test_interp_combo(self): + df = DataFrame( + { + "A": [1.0, 2.0, np.nan, 4.0], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + + result = df["A"].interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], name="A") + tm.assert_series_equal(result, expected) + + result = df["A"].interpolate(downcast="infer") + expected = Series([1, 2, 3, 4], name="A") + tm.assert_series_equal(result, expected) + + def test_interp_nan_idx(self): + df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]}) + df = df.set_index("A") + with pytest.raises(NotImplementedError): + df.interpolate(method="values") + + @td.skip_if_no_scipy + def test_interp_various(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + df = df.set_index("C") + expected = df.copy() + result = df.interpolate(method="polynomial", order=1) + + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923076 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="cubic") + # GH #15662. + expected.A.loc[3] = 2.81547781 + expected.A.loc[13] = 5.52964175 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="nearest") + expected.A.loc[3] = 2 + expected.A.loc[13] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df.interpolate(method="quadratic") + expected.A.loc[3] = 2.82150771 + expected.A.loc[13] = 6.12648668 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="slinear") + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923077 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="zero") + expected.A.loc[3] = 2.0 + expected.A.loc[13] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + @td.skip_if_no_scipy + def test_interp_alt_scipy(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + result = df.interpolate(method="barycentric") + expected = df.copy() + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="barycentric", downcast="infer") + tm.assert_frame_equal(result, expected.astype(np.int64)) + + result = df.interpolate(method="krogh") + expectedk = df.copy() + expectedk["A"] = expected["A"] + tm.assert_frame_equal(result, expectedk) + + result = df.interpolate(method="pchip") + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6.0 + + tm.assert_frame_equal(result, expected) + + def test_interp_rowwise(self): + df = DataFrame( + { + 0: [1, 2, np.nan, 4], + 1: [2, 3, 4, np.nan], + 2: [np.nan, 4, 5, 6], + 3: [4, np.nan, 6, 7], + 4: [1, 2, 3, 4], + } + ) + result = df.interpolate(axis=1) + expected = df.copy() + expected.loc[3, 1] = 5 + expected.loc[0, 2] = 3 + expected.loc[1, 3] = 3 + expected[4] = expected[4].astype(np.float64) + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=1, method="values") + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=0) + expected = df.interpolate() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "axis_name, axis_number", + [ + pytest.param("rows", 0, id="rows_0"), + pytest.param("index", 0, id="index_0"), + pytest.param("columns", 1, id="columns_1"), + ], + ) + def test_interp_axis_names(self, axis_name, axis_number): + # GH 29132: test axis names + data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} + + df = DataFrame(data, dtype=np.float64) + result = df.interpolate(axis=axis_name, method="linear") + expected = df.interpolate(axis=axis_number, method="linear") + tm.assert_frame_equal(result, expected) + + def test_rowwise_alt(self): + df = DataFrame( + { + 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64], + 1: [1, 2, 3, 4, 3, 2, 1, 0, -1], + } + ) + df.interpolate(axis=0) + # TODO: assert something? + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_leading_nans(self, check_scipy): + df = DataFrame( + {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]} + ) + result = df.interpolate() + expected = df.copy() + expected["B"].loc[3] = -3.75 + tm.assert_frame_equal(result, expected) + + if check_scipy: + result = df.interpolate(method="polynomial", order=1) + tm.assert_frame_equal(result, expected) + + def test_interp_raise_on_only_mixed(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": ["a", "b", "c", "d"], + "C": [np.nan, 2, 5, 7], + "D": [np.nan, np.nan, 9, 9], + "E": [1, 2, 3, 4], + } + ) + with pytest.raises(TypeError): + df.interpolate(axis=1) + + def test_interp_raise_on_all_object_dtype(self): + # GH 22985 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.interpolate() + + def test_interp_inplace(self): + df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) + expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) + result = df.copy() + result["a"].interpolate(inplace=True) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result["a"].interpolate(inplace=True, downcast="infer") + tm.assert_frame_equal(result, expected.astype("int64")) + + def test_interp_inplace_row(self): + # GH 10395 + result = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]} + ) + expected = result.interpolate(method="linear", axis=1, inplace=False) + result.interpolate(method="linear", axis=1, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_interp_ignore_all_good(self): + # GH + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 2, 3, 4], + "C": [1.0, 2.0, np.nan, 4.0], + "D": [1.0, 2.0, 3.0, 4.0], + } + ) + expected = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="float64"), + "B": np.array([1, 2, 3, 4], dtype="int64"), + "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"), + "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"), + } + ) + + result = df.interpolate(downcast=None) + tm.assert_frame_equal(result, expected) + + # all good + result = df[["B", "D"]].interpolate(downcast=None) + tm.assert_frame_equal(result, df[["B", "D"]]) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_interp_time_inplace_axis(self, axis): + # GH 9687 + periods = 5 + idx = date_range(start="2014-01-01", periods=periods) + data = np.random.rand(periods, periods) + data[data < 0.5] = np.nan + expected = DataFrame(index=idx, columns=idx, data=data) + + result = expected.interpolate(axis=0, method="time") + expected.interpolate(axis=0, method="time", inplace=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index ae0516dd29a1f..196df8ba00476 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -4,8 +4,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import Categorical, DataFrame, Series, Timestamp, date_range import pandas._testing as tm @@ -705,281 +703,3 @@ def test_fill_value_when_combine_const(self): exp = df.fillna(0).add(2) res = df.add(2, fill_value=0) tm.assert_frame_equal(res, exp) - - -class TestDataFrameInterpolate: - def test_interp_basic(self): - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": [1, 4, 9, np.nan], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - expected = DataFrame( - { - "A": [1.0, 2.0, 3.0, 4.0], - "B": [1.0, 4.0, 9.0, 9.0], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - result = df.interpolate() - tm.assert_frame_equal(result, expected) - - result = df.set_index("C").interpolate() - expected = df.set_index("C") - expected.loc[3, "A"] = 3 - expected.loc[5, "B"] = 9 - tm.assert_frame_equal(result, expected) - - def test_interp_bad_method(self): - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": [1, 4, 9, np.nan], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - with pytest.raises(ValueError): - df.interpolate(method="not_a_method") - - def test_interp_combo(self): - df = DataFrame( - { - "A": [1.0, 2.0, np.nan, 4.0], - "B": [1, 4, 9, np.nan], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - - result = df["A"].interpolate() - expected = Series([1.0, 2.0, 3.0, 4.0], name="A") - tm.assert_series_equal(result, expected) - - result = df["A"].interpolate(downcast="infer") - expected = Series([1, 2, 3, 4], name="A") - tm.assert_series_equal(result, expected) - - def test_interp_nan_idx(self): - df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]}) - df = df.set_index("A") - with pytest.raises(NotImplementedError): - df.interpolate(method="values") - - @td.skip_if_no_scipy - def test_interp_various(self): - df = DataFrame( - {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} - ) - df = df.set_index("C") - expected = df.copy() - result = df.interpolate(method="polynomial", order=1) - - expected.A.loc[3] = 2.66666667 - expected.A.loc[13] = 5.76923076 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="cubic") - # GH #15662. - expected.A.loc[3] = 2.81547781 - expected.A.loc[13] = 5.52964175 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="nearest") - expected.A.loc[3] = 2 - expected.A.loc[13] = 5 - tm.assert_frame_equal(result, expected, check_dtype=False) - - result = df.interpolate(method="quadratic") - expected.A.loc[3] = 2.82150771 - expected.A.loc[13] = 6.12648668 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="slinear") - expected.A.loc[3] = 2.66666667 - expected.A.loc[13] = 5.76923077 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="zero") - expected.A.loc[3] = 2.0 - expected.A.loc[13] = 5 - tm.assert_frame_equal(result, expected, check_dtype=False) - - @td.skip_if_no_scipy - def test_interp_alt_scipy(self): - df = DataFrame( - {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} - ) - result = df.interpolate(method="barycentric") - expected = df.copy() - expected.loc[2, "A"] = 3 - expected.loc[5, "A"] = 6 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="barycentric", downcast="infer") - tm.assert_frame_equal(result, expected.astype(np.int64)) - - result = df.interpolate(method="krogh") - expectedk = df.copy() - expectedk["A"] = expected["A"] - tm.assert_frame_equal(result, expectedk) - - result = df.interpolate(method="pchip") - expected.loc[2, "A"] = 3 - expected.loc[5, "A"] = 6.0 - - tm.assert_frame_equal(result, expected) - - def test_interp_rowwise(self): - df = DataFrame( - { - 0: [1, 2, np.nan, 4], - 1: [2, 3, 4, np.nan], - 2: [np.nan, 4, 5, 6], - 3: [4, np.nan, 6, 7], - 4: [1, 2, 3, 4], - } - ) - result = df.interpolate(axis=1) - expected = df.copy() - expected.loc[3, 1] = 5 - expected.loc[0, 2] = 3 - expected.loc[1, 3] = 3 - expected[4] = expected[4].astype(np.float64) - tm.assert_frame_equal(result, expected) - - result = df.interpolate(axis=1, method="values") - tm.assert_frame_equal(result, expected) - - result = df.interpolate(axis=0) - expected = df.interpolate() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "axis_name, axis_number", - [ - pytest.param("rows", 0, id="rows_0"), - pytest.param("index", 0, id="index_0"), - pytest.param("columns", 1, id="columns_1"), - ], - ) - def test_interp_axis_names(self, axis_name, axis_number): - # GH 29132: test axis names - data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} - - df = DataFrame(data, dtype=np.float64) - result = df.interpolate(axis=axis_name, method="linear") - expected = df.interpolate(axis=axis_number, method="linear") - tm.assert_frame_equal(result, expected) - - def test_rowwise_alt(self): - df = DataFrame( - { - 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64], - 1: [1, 2, 3, 4, 3, 2, 1, 0, -1], - } - ) - df.interpolate(axis=0) - - @pytest.mark.parametrize( - "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] - ) - def test_interp_leading_nans(self, check_scipy): - df = DataFrame( - {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]} - ) - result = df.interpolate() - expected = df.copy() - expected["B"].loc[3] = -3.75 - tm.assert_frame_equal(result, expected) - - if check_scipy: - result = df.interpolate(method="polynomial", order=1) - tm.assert_frame_equal(result, expected) - - def test_interp_raise_on_only_mixed(self): - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": ["a", "b", "c", "d"], - "C": [np.nan, 2, 5, 7], - "D": [np.nan, np.nan, 9, 9], - "E": [1, 2, 3, 4], - } - ) - with pytest.raises(TypeError): - df.interpolate(axis=1) - - def test_interp_raise_on_all_object_dtype(self): - # GH 22985 - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") - msg = ( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) - with pytest.raises(TypeError, match=msg): - df.interpolate() - - def test_interp_inplace(self): - df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) - expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) - result = df.copy() - result["a"].interpolate(inplace=True) - tm.assert_frame_equal(result, expected) - - result = df.copy() - result["a"].interpolate(inplace=True, downcast="infer") - tm.assert_frame_equal(result, expected.astype("int64")) - - def test_interp_inplace_row(self): - # GH 10395 - result = DataFrame( - {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]} - ) - expected = result.interpolate(method="linear", axis=1, inplace=False) - result.interpolate(method="linear", axis=1, inplace=True) - tm.assert_frame_equal(result, expected) - - def test_interp_ignore_all_good(self): - # GH - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": [1, 2, 3, 4], - "C": [1.0, 2.0, np.nan, 4.0], - "D": [1.0, 2.0, 3.0, 4.0], - } - ) - expected = DataFrame( - { - "A": np.array([1, 2, 3, 4], dtype="float64"), - "B": np.array([1, 2, 3, 4], dtype="int64"), - "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"), - "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"), - } - ) - - result = df.interpolate(downcast=None) - tm.assert_frame_equal(result, expected) - - # all good - result = df[["B", "D"]].interpolate(downcast=None) - tm.assert_frame_equal(result, df[["B", "D"]]) - - @pytest.mark.parametrize("axis", [0, 1]) - def test_interp_time_inplace_axis(self, axis): - # GH 9687 - periods = 5 - idx = pd.date_range(start="2014-01-01", periods=periods) - data = np.random.rand(periods, periods) - data[data < 0.5] = np.nan - expected = pd.DataFrame(index=idx, columns=idx, data=data) - - result = expected.interpolate(axis=0, method="time") - expected.interpolate(axis=0, method="time", inplace=True) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 8bc2aa214e035..33f71602f4713 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -10,82 +10,6 @@ class TestDataFrameMutateColumns: - def test_assign(self): - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - original = df.copy() - result = df.assign(C=df.B / df.A) - expected = df.copy() - expected["C"] = [4, 2.5, 2] - tm.assert_frame_equal(result, expected) - - # lambda syntax - result = df.assign(C=lambda x: x.B / x.A) - tm.assert_frame_equal(result, expected) - - # original is unmodified - tm.assert_frame_equal(df, original) - - # Non-Series array-like - result = df.assign(C=[4, 2.5, 2]) - tm.assert_frame_equal(result, expected) - # original is unmodified - tm.assert_frame_equal(df, original) - - result = df.assign(B=df.B / df.A) - expected = expected.drop("B", axis=1).rename(columns={"C": "B"}) - tm.assert_frame_equal(result, expected) - - # overwrite - result = df.assign(A=df.A + df.B) - expected = df.copy() - expected["A"] = [5, 7, 9] - tm.assert_frame_equal(result, expected) - - # lambda - result = df.assign(A=lambda x: x.A + x.B) - tm.assert_frame_equal(result, expected) - - def test_assign_multiple(self): - df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"]) - result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) - expected = DataFrame( - [[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE") - ) - tm.assert_frame_equal(result, expected) - - def test_assign_order(self): - # GH 9818 - df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) - result = df.assign(D=df.A + df.B, C=df.A - df.B) - - expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC")) - tm.assert_frame_equal(result, expected) - result = df.assign(C=df.A - df.B, D=df.A + df.B) - - expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD")) - - tm.assert_frame_equal(result, expected) - - def test_assign_bad(self): - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - - # non-keyword argument - with pytest.raises(TypeError): - df.assign(lambda x: x.A) - with pytest.raises(AttributeError): - df.assign(C=df.A, D=df.A + df.C) - - def test_assign_dependent(self): - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - - result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) - expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) - tm.assert_frame_equal(result, expected) - - result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) - expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) - tm.assert_frame_equal(result, expected) - def test_insert_error_msmgs(self): # GH 7432 From 10d10c69590fd0b5bad246abfd4b9802bba42612 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:04:38 -0800 Subject: [PATCH 102/388] move method from LocationIndexer to Index (#31857) --- pandas/core/indexes/base.py | 10 ++++++++++ pandas/core/indexes/multi.py | 29 +++++++++++++++++++++++++++++ pandas/core/indexing.py | 34 +--------------------------------- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 14ee21ea5614c..f9f343dd9056a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3091,6 +3091,16 @@ def _filter_indexer_tolerance( # -------------------------------------------------------------------- # Indexer Conversion Methods + def _get_partial_string_timestamp_match_key(self, key): + """ + Translate any partial string timestamp matches in key, returning the + new key. + + Only relevant for MultiIndex. + """ + # GH#10331 + return key + def _convert_scalar_indexer(self, key, kind: str_t): """ Convert a scalar indexer. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0a79df0cc9744..4bd462e83a5bc 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2323,6 +2323,35 @@ def _convert_listlike_indexer(self, keyarr): return indexer, keyarr + def _get_partial_string_timestamp_match_key(self, key): + """ + Translate any partial string timestamp matches in key, returning the + new key. + + Only relevant for MultiIndex. + """ + # GH#10331 + if isinstance(key, str) and self.levels[0]._supports_partial_string_indexing: + # Convert key '2016-01-01' to + # ('2016-01-01'[, slice(None, None, None)]+) + key = tuple([key] + [slice(None)] * (len(self.levels) - 1)) + + if isinstance(key, tuple): + # Convert (..., '2016-01-01', ...) in tuple to + # (..., slice('2016-01-01', '2016-01-01', None), ...) + new_key = [] + for i, component in enumerate(key): + if ( + isinstance(component, str) + and self.levels[i]._supports_partial_string_indexing + ): + new_key.append(slice(component, component, None)) + else: + new_key.append(component) + key = tuple(new_key) + + return key + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5ae237eb7dc32..745456109148e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -966,38 +966,6 @@ def _multi_take(self, tup: Tuple): # ------------------------------------------------------------------- - def _get_partial_string_timestamp_match_key(self, key, labels): - """ - Translate any partial string timestamp matches in key, returning the - new key. - - (GH 10331) - """ - if isinstance(labels, ABCMultiIndex): - if ( - isinstance(key, str) - and labels.levels[0]._supports_partial_string_indexing - ): - # Convert key '2016-01-01' to - # ('2016-01-01'[, slice(None, None, None)]+) - key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) - - if isinstance(key, tuple): - # Convert (..., '2016-01-01', ...) in tuple to - # (..., slice('2016-01-01', '2016-01-01', None), ...) - new_key = [] - for i, component in enumerate(key): - if ( - isinstance(component, str) - and labels.levels[i]._supports_partial_string_indexing - ): - new_key.append(slice(component, component, None)) - else: - new_key.append(component) - key = tuple(new_key) - - return key - def _getitem_iterable(self, key, axis: int): """ Index current object with an an iterable collection of keys. @@ -1079,7 +1047,7 @@ def _getitem_axis(self, key, axis: int): key = list(key) labels = self.obj._get_axis(axis) - key = self._get_partial_string_timestamp_match_key(key, labels) + key = labels._get_partial_string_timestamp_match_key(key) if isinstance(key, slice): self._validate_key(key, axis) From 6ab00bcb4d7fa66482fca8a34fc05c85d9101c87 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:06:40 -0800 Subject: [PATCH 103/388] REF: collect arithmetic benchmarks (#32116) --- .../{binary_ops.py => arithmetic.py} | 179 +++++++++++++++++- asv_bench/benchmarks/categoricals.py | 12 -- asv_bench/benchmarks/index_object.py | 26 --- asv_bench/benchmarks/inference.py | 49 +---- asv_bench/benchmarks/offset.py | 80 -------- asv_bench/benchmarks/timedelta.py | 11 +- asv_bench/benchmarks/timeseries.py | 12 -- 7 files changed, 181 insertions(+), 188 deletions(-) rename asv_bench/benchmarks/{binary_ops.py => arithmetic.py} (51%) delete mode 100644 asv_bench/benchmarks/offset.py diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/arithmetic.py similarity index 51% rename from asv_bench/benchmarks/binary_ops.py rename to asv_bench/benchmarks/arithmetic.py index 64e067d25a454..d1e94f62967f4 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -1,14 +1,23 @@ import operator +import warnings import numpy as np -from pandas import DataFrame, Series, date_range +import pandas as pd +from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta +import pandas._testing as tm from pandas.core.algorithms import checked_add_with_arr +from .pandas_vb_common import numeric_dtypes + try: import pandas.core.computation.expressions as expr except ImportError: import pandas.computation.expressions as expr +try: + import pandas.tseries.holiday +except ImportError: + pass class IntFrameWithScalar: @@ -151,6 +160,110 @@ def time_timestamp_ops_diff_with_shift(self, tz): self.s - self.s.shift() +class IrregularOps: + def setup(self): + N = 10 ** 5 + idx = date_range(start="1/1/2000", periods=N, freq="s") + s = Series(np.random.randn(N), index=idx) + self.left = s.sample(frac=1) + self.right = s.sample(frac=1) + + def time_add(self): + self.left + self.right + + +class TimedeltaOps: + def setup(self): + self.td = to_timedelta(np.arange(1000000)) + self.ts = Timestamp("2000") + + def time_add_td_ts(self): + self.td + self.ts + + +class CategoricalComparisons: + params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] + param_names = ["op"] + + def setup(self, op): + N = 10 ** 5 + self.cat = pd.Categorical(list("aabbcd") * N, ordered=True) + + def time_categorical_op(self, op): + getattr(self.cat, op)("b") + + +class IndexArithmetic: + + params = ["float", "int"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10 ** 6 + indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"} + self.index = getattr(tm, indexes[dtype])(N) + + def time_add(self, dtype): + self.index + 2 + + def time_subtract(self, dtype): + self.index - 2 + + def time_multiply(self, dtype): + self.index * 2 + + def time_divide(self, dtype): + self.index / 2 + + def time_modulo(self, dtype): + self.index % 2 + + +class NumericInferOps: + # from GH 7332 + params = numeric_dtypes + param_names = ["dtype"] + + def setup(self, dtype): + N = 5 * 10 ** 5 + self.df = DataFrame( + {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)} + ) + + def time_add(self, dtype): + self.df["A"] + self.df["B"] + + def time_subtract(self, dtype): + self.df["A"] - self.df["B"] + + def time_multiply(self, dtype): + self.df["A"] * self.df["B"] + + def time_divide(self, dtype): + self.df["A"] / self.df["B"] + + def time_modulo(self, dtype): + self.df["A"] % self.df["B"] + + +class DateInferOps: + # from GH 7332 + def setup_cache(self): + N = 5 * 10 ** 5 + df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")}) + df["timedelta"] = df["datetime64"] - df["datetime64"] + return df + + def time_subtract_datetimes(self, df): + df["datetime64"] - df["datetime64"] + + def time_timedelta_plus_datetime(self, df): + df["timedelta"] + df["datetime64"] + + def time_add_timedeltas(self, df): + df["timedelta"] + df["timedelta"] + + class AddOverflowScalar: params = [1, -1, 0] @@ -188,4 +301,68 @@ def time_add_overflow_both_arg_nan(self): ) +hcal = pd.tseries.holiday.USFederalHolidayCalendar() +# These offsets currently raise a NotImplimentedError with .apply_index() +non_apply = [ + pd.offsets.Day(), + pd.offsets.BYearEnd(), + pd.offsets.BYearBegin(), + pd.offsets.BQuarterEnd(), + pd.offsets.BQuarterBegin(), + pd.offsets.BMonthEnd(), + pd.offsets.BMonthBegin(), + pd.offsets.CustomBusinessDay(), + pd.offsets.CustomBusinessDay(calendar=hcal), + pd.offsets.CustomBusinessMonthBegin(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), +] +other_offsets = [ + pd.offsets.YearEnd(), + pd.offsets.YearBegin(), + pd.offsets.QuarterEnd(), + pd.offsets.QuarterBegin(), + pd.offsets.MonthEnd(), + pd.offsets.MonthBegin(), + pd.offsets.DateOffset(months=2, days=2), + pd.offsets.BusinessDay(), + pd.offsets.SemiMonthEnd(), + pd.offsets.SemiMonthBegin(), +] +offsets = non_apply + other_offsets + + +class OffsetArrayArithmetic: + + params = offsets + param_names = ["offset"] + + def setup(self, offset): + N = 10000 + rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + self.rng = rng + self.ser = pd.Series(rng) + + def time_add_series_offset(self, offset): + with warnings.catch_warnings(record=True): + self.ser + offset + + def time_add_dti_offset(self, offset): + with warnings.catch_warnings(record=True): + self.rng + offset + + +class ApplyIndex: + params = other_offsets + param_names = ["offset"] + + def setup(self, offset): + N = 10000 + rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + self.rng = rng + + def time_apply_index(self, offset): + offset.apply_index(self.rng) + + from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 1dcd52ac074a6..6f43a6fd3fc9b 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -63,18 +63,6 @@ def time_existing_series(self): pd.Categorical(self.series) -class CategoricalOps: - params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] - param_names = ["op"] - - def setup(self, op): - N = 10 ** 5 - self.cat = pd.Categorical(list("aabbcd") * N, ordered=True) - - def time_categorical_op(self, op): - getattr(self.cat, op)("b") - - class Concat: def setup(self): N = 10 ** 5 diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 103141545504b..cf51a4d35f805 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -63,32 +63,6 @@ def time_is_dates_only(self): self.dr._is_dates_only -class Ops: - - params = ["float", "int"] - param_names = ["dtype"] - - def setup(self, dtype): - N = 10 ** 6 - indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"} - self.index = getattr(tm, indexes[dtype])(N) - - def time_add(self, dtype): - self.index + 2 - - def time_subtract(self, dtype): - self.index - 2 - - def time_multiply(self, dtype): - self.index * 2 - - def time_divide(self, dtype): - self.index / 2 - - def time_modulo(self, dtype): - self.index % 2 - - class Range: def setup(self): self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 1a8d5ede52512..40b064229ae49 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -1,53 +1,8 @@ import numpy as np -from pandas import DataFrame, Series, to_numeric +from pandas import Series, to_numeric -from .pandas_vb_common import lib, numeric_dtypes, tm - - -class NumericInferOps: - # from GH 7332 - params = numeric_dtypes - param_names = ["dtype"] - - def setup(self, dtype): - N = 5 * 10 ** 5 - self.df = DataFrame( - {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)} - ) - - def time_add(self, dtype): - self.df["A"] + self.df["B"] - - def time_subtract(self, dtype): - self.df["A"] - self.df["B"] - - def time_multiply(self, dtype): - self.df["A"] * self.df["B"] - - def time_divide(self, dtype): - self.df["A"] / self.df["B"] - - def time_modulo(self, dtype): - self.df["A"] % self.df["B"] - - -class DateInferOps: - # from GH 7332 - def setup_cache(self): - N = 5 * 10 ** 5 - df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")}) - df["timedelta"] = df["datetime64"] - df["datetime64"] - return df - - def time_subtract_datetimes(self, df): - df["datetime64"] - df["datetime64"] - - def time_timedelta_plus_datetime(self, df): - df["timedelta"] + df["datetime64"] - - def time_add_timedeltas(self, df): - df["timedelta"] + df["timedelta"] +from .pandas_vb_common import lib, tm class ToNumeric: diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py deleted file mode 100644 index 77ce1b2763bce..0000000000000 --- a/asv_bench/benchmarks/offset.py +++ /dev/null @@ -1,80 +0,0 @@ -import warnings - -import pandas as pd - -try: - import pandas.tseries.holiday -except ImportError: - pass - -hcal = pd.tseries.holiday.USFederalHolidayCalendar() -# These offsets currently raise a NotImplimentedError with .apply_index() -non_apply = [ - pd.offsets.Day(), - pd.offsets.BYearEnd(), - pd.offsets.BYearBegin(), - pd.offsets.BQuarterEnd(), - pd.offsets.BQuarterBegin(), - pd.offsets.BMonthEnd(), - pd.offsets.BMonthBegin(), - pd.offsets.CustomBusinessDay(), - pd.offsets.CustomBusinessDay(calendar=hcal), - pd.offsets.CustomBusinessMonthBegin(calendar=hcal), - pd.offsets.CustomBusinessMonthEnd(calendar=hcal), - pd.offsets.CustomBusinessMonthEnd(calendar=hcal), -] -other_offsets = [ - pd.offsets.YearEnd(), - pd.offsets.YearBegin(), - pd.offsets.QuarterEnd(), - pd.offsets.QuarterBegin(), - pd.offsets.MonthEnd(), - pd.offsets.MonthBegin(), - pd.offsets.DateOffset(months=2, days=2), - pd.offsets.BusinessDay(), - pd.offsets.SemiMonthEnd(), - pd.offsets.SemiMonthBegin(), -] -offsets = non_apply + other_offsets - - -class ApplyIndex: - - params = other_offsets - param_names = ["offset"] - - def setup(self, offset): - N = 10000 - self.rng = pd.date_range(start="1/1/2000", periods=N, freq="T") - - def time_apply_index(self, offset): - offset.apply_index(self.rng) - - -class OffsetSeriesArithmetic: - - params = offsets - param_names = ["offset"] - - def setup(self, offset): - N = 1000 - rng = pd.date_range(start="1/1/2000", periods=N, freq="T") - self.data = pd.Series(rng) - - def time_add_offset(self, offset): - with warnings.catch_warnings(record=True): - self.data + offset - - -class OffsetDatetimeIndexArithmetic: - - params = offsets - param_names = ["offset"] - - def setup(self, offset): - N = 1000 - self.data = pd.date_range(start="1/1/2000", periods=N, freq="T") - - def time_add_offset(self, offset): - with warnings.catch_warnings(record=True): - self.data + offset diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 37418d752f833..208c8f9d14a5e 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -5,7 +5,7 @@ import numpy as np -from pandas import DataFrame, Series, Timestamp, timedelta_range, to_timedelta +from pandas import DataFrame, Series, timedelta_range, to_timedelta class ToTimedelta: @@ -41,15 +41,6 @@ def time_convert(self, errors): to_timedelta(self.arr, errors=errors) -class TimedeltaOps: - def setup(self): - self.td = to_timedelta(np.arange(1000000)) - self.ts = Timestamp("2000") - - def time_add_td_ts(self): - self.td + self.ts - - class DatetimeAccessor: def setup_cache(self): N = 100000 diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index ba0b51922fd31..2f7ea8b9c0873 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -262,18 +262,6 @@ def time_get_slice(self, monotonic): self.s[:10000] -class IrregularOps: - def setup(self): - N = 10 ** 5 - idx = date_range(start="1/1/2000", periods=N, freq="s") - s = Series(np.random.randn(N), index=idx) - self.left = s.sample(frac=1) - self.right = s.sample(frac=1) - - def time_add(self): - self.left + self.right - - class Lookup: def setup(self): N = 1500000 From 628dfba239865adc09c94108b288bcb60c619950 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:08:23 -0800 Subject: [PATCH 104/388] BUG: Fix incorrect _is_scalar_access check in iloc (#32085) --- pandas/core/generic.py | 4 ++-- pandas/core/indexing.py | 4 ---- pandas/tests/indexing/test_iloc.py | 11 +++++++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 579daae2b15c6..ac03843a0e27d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3467,13 +3467,13 @@ def _get_item_cache(self, item): res._is_copy = self._is_copy return res - def _iget_item_cache(self, item): + def _iget_item_cache(self, item: int): """Return the cached item, item represents a positional indexer.""" ax = self._info_axis if ax.is_unique: lower = self._get_item_cache(ax[item]) else: - lower = self._take_with_is_copy(item, axis=self._info_axis_number) + return self._ixs(item, axis=1) return lower def _box_item_values(self, key, values): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 745456109148e..c53e690b5f46c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1414,10 +1414,6 @@ def _is_scalar_access(self, key: Tuple) -> bool: if not is_integer(k): return False - ax = self.obj.axes[i] - if not ax.is_unique: - return False - return True def _validate_integer(self, key: int, axis: int) -> None: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 500bd1853e9a4..683d4f2605712 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -47,6 +47,17 @@ def test_iloc_getitem_list_int(self): class TestiLoc2: # TODO: better name, just separating out things that dont rely on base class + + def test_is_scalar_access(self): + # GH#32085 index with duplicates doesnt matter for _is_scalar_access + index = pd.Index([1, 2, 1]) + ser = pd.Series(range(3), index=index) + + assert ser.iloc._is_scalar_access((1,)) + + df = ser.to_frame() + assert df.iloc._is_scalar_access((1, 0,)) + def test_iloc_exceeds_bounds(self): # GH6296 From a66e149a073546a2c562c7bfcd032ca40df954a9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:09:14 -0800 Subject: [PATCH 105/388] PERF: IntegerIndex._shallow_copy (#32130) --- pandas/core/indexes/numeric.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 877b3d1d2ba30..367870f0ee467 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -103,11 +103,16 @@ def _maybe_cast_slice_bound(self, label, side, kind): return self._maybe_cast_indexer(label) @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, **kwargs): - if values is not None and not self._can_hold_na: + def _shallow_copy(self, values=None, name=lib.no_default): + name = name if name is not lib.no_default else self.name + + if values is not None and not self._can_hold_na and values.dtype.kind == "f": # Ensure we are not returning an Int64Index with float data: - return self._shallow_copy_with_infer(values=values, **kwargs) - return super()._shallow_copy(values=values, **kwargs) + return Float64Index._simple_new(values, name=name) + + if values is None: + values = self.values + return type(self)._simple_new(values, name=name) def _convert_for_op(self, value): """ From 9aa9e4feaf75e3726324d49d7970ab63e285b75f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:10:23 -0800 Subject: [PATCH 106/388] CLN: NDFrame.__init__ unnecessary code (#32131) --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 40 ++++++++++---------------- pandas/core/series.py | 2 +- pandas/tests/indexing/test_indexing.py | 26 ----------------- 4 files changed, 17 insertions(+), 53 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b3da22d10eddb..7efb4fbb878d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -514,7 +514,7 @@ def __init__( else: raise ValueError("DataFrame constructor not properly called!") - NDFrame.__init__(self, mgr, fastpath=True) + NDFrame.__init__(self, mgr) # ---------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ac03843a0e27d..40e59f04192a6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -33,7 +33,6 @@ from pandas._libs import Timestamp, iNaT, lib from pandas._typing import ( Axis, - Dtype, FilePathOrBuffer, FrameOrSeries, JSONSerializable, @@ -200,22 +199,10 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): def __init__( self, data: BlockManager, - axes: Optional[List[Index]] = None, copy: bool = False, - dtype: Optional[Dtype] = None, attrs: Optional[Mapping[Optional[Hashable], Any]] = None, - fastpath: bool = False, ): - - if not fastpath: - if dtype is not None: - data = data.astype(dtype) - elif copy: - data = data.copy() - - if axes is not None: - for i, ax in enumerate(axes): - data = data.reindex_axis(ax, axis=i) + # copy kwarg is retained for mypy compat, is not used object.__setattr__(self, "_is_copy", None) object.__setattr__(self, "_data", data) @@ -226,12 +213,13 @@ def __init__( attrs = dict(attrs) object.__setattr__(self, "_attrs", attrs) - def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): + @classmethod + def _init_mgr(cls, mgr, axes=None, dtype=None, copy=False): """ passed a manager and a axes dict """ for a, axe in axes.items(): if axe is not None: mgr = mgr.reindex_axis( - axe, axis=self._get_block_manager_axis(a), copy=False + axe, axis=cls._get_block_manager_axis(a), copy=False ) # make a copy if explicitly requested @@ -262,7 +250,8 @@ def attrs(self) -> Dict[Optional[Hashable], Any]: def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None: self._attrs = dict(value) - def _validate_dtype(self, dtype): + @classmethod + def _validate_dtype(cls, dtype): """ validate the passed dtype """ if dtype is not None: dtype = pandas_dtype(dtype) @@ -271,7 +260,7 @@ def _validate_dtype(self, dtype): if dtype.kind == "V": raise NotImplementedError( "compound dtypes are not implemented " - f"in the {type(self).__name__} constructor" + f"in the {cls.__name__} constructor" ) return dtype @@ -324,8 +313,9 @@ def _construct_axes_dict(self, axes=None, **kwargs): d.update(kwargs) return d + @classmethod def _construct_axes_from_arguments( - self, args, kwargs, require_all: bool = False, sentinel=None + cls, args, kwargs, require_all: bool = False, sentinel=None ): """ Construct and returns axes if supplied in args/kwargs. @@ -339,7 +329,7 @@ def _construct_axes_from_arguments( """ # construct the args args = list(args) - for a in self._AXIS_ORDERS: + for a in cls._AXIS_ORDERS: # look for a argument by position if a not in kwargs: @@ -349,7 +339,7 @@ def _construct_axes_from_arguments( if require_all: raise TypeError("not enough/duplicate arguments specified!") - axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS} + axes = {a: kwargs.pop(a, sentinel) for a in cls._AXIS_ORDERS} return axes, kwargs @classmethod @@ -495,7 +485,7 @@ def ndim(self) -> int: return self._data.ndim @property - def size(self): + def size(self) -> int: """ Return an int representing the number of elements in this object. @@ -3660,7 +3650,7 @@ def get(self, key, default=None): return default @property - def _is_view(self): + def _is_view(self) -> bool_t: """Return boolean indicating if self is view of another array """ return self._data.is_view @@ -5176,12 +5166,12 @@ def _consolidate(self, inplace: bool_t = False): return self._constructor(cons_data).__finalize__(self) @property - def _is_mixed_type(self): + def _is_mixed_type(self) -> bool_t: f = lambda: self._data.is_mixed_type return self._protect_consolidate(f) @property - def _is_numeric_mixed_type(self): + def _is_numeric_mixed_type(self) -> bool_t: f = lambda: self._data.is_numeric_mixed_type return self._protect_consolidate(f) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9c0ff9780da3e..2182374337c84 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -324,7 +324,7 @@ def __init__( data = SingleBlockManager(data, index, fastpath=True) - generic.NDFrame.__init__(self, data, fastpath=True) + generic.NDFrame.__init__(self, data) self.name = name self._set_axis(0, index, fastpath=True) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 1b3e301b0fef0..8af0fe548e48a 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -7,14 +7,11 @@ import numpy as np import pytest -from pandas.errors import AbstractMethodError - from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas as pd from pandas import DataFrame, Index, NaT, Series import pandas._testing as tm -from pandas.core.generic import NDFrame from pandas.core.indexers import validate_indices from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice from pandas.tests.indexing.common import _mklbl @@ -1094,29 +1091,6 @@ def test_extension_array_cross_section_converts(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "idxr, error, error_message", - [ - (lambda x: x, AbstractMethodError, None), - ( - lambda x: x.loc, - AttributeError, - "type object 'NDFrame' has no attribute '_AXIS_NAMES'", - ), - ( - lambda x: x.iloc, - AttributeError, - "type object 'NDFrame' has no attribute '_AXIS_NAMES'", - ), - ], -) -def test_ndframe_indexing_raises(idxr, error, error_message): - # GH 25567 - frame = NDFrame(np.random.randint(5, size=(2, 2, 2))) - with pytest.raises(error, match=error_message): - idxr(frame)[0] - - def test_readonly_indices(): # GH#17192 iloc with read-only array raising TypeError df = pd.DataFrame({"data": np.ones(100, dtype="float64")}) From 5ed1a0bc8d4b08838c936d137b542914e62037c0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:11:42 -0800 Subject: [PATCH 107/388] CLN: organize MultiIndex indexing tests (#31863) * implement test_take * move test_take, organize tests * cleanup * remove unused imports * isort fixup * typo fixup --- pandas/conftest.py | 20 + pandas/tests/indexes/multi/test_indexing.py | 692 ++++++++++---------- 2 files changed, 350 insertions(+), 362 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index d19bf85877140..f7c6a0c899642 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -956,6 +956,25 @@ def __len__(self): return TestNonDictMapping +def _gen_mi(): + # a MultiIndex used to test the general functionality of this object + + # See Also: tests.multi.conftest.idx + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi + + indices_dict = { "unicode": tm.makeUnicodeIndex(100), "string": tm.makeStringIndex(100), @@ -972,6 +991,7 @@ def __len__(self): "interval": tm.makeIntervalIndex(100), "empty": Index([]), "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), + "multi": _gen_mi(), "repeats": Index([0, 0, 1, 1, 2, 2]), } diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 3c9b34a4a1439..39049006edb7c 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -4,116 +4,126 @@ import pytest import pandas as pd -from pandas import ( - Categorical, - CategoricalIndex, - Index, - IntervalIndex, - MultiIndex, - date_range, -) +from pandas import Categorical, Index, MultiIndex, date_range import pandas._testing as tm from pandas.core.indexes.base import InvalidIndexError -def test_slice_locs_partial(idx): - sorted_idx, _ = idx.sortlevel(0) - - result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one")) - assert result == (1, 5) +class TestSliceLocs: + def test_slice_locs_partial(self, idx): + sorted_idx, _ = idx.sortlevel(0) - result = sorted_idx.slice_locs(None, ("qux", "one")) - assert result == (0, 5) + result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one")) + assert result == (1, 5) - result = sorted_idx.slice_locs(("foo", "two"), None) - assert result == (1, len(sorted_idx)) + result = sorted_idx.slice_locs(None, ("qux", "one")) + assert result == (0, 5) - result = sorted_idx.slice_locs("bar", "baz") - assert result == (2, 4) + result = sorted_idx.slice_locs(("foo", "two"), None) + assert result == (1, len(sorted_idx)) + result = sorted_idx.slice_locs("bar", "baz") + assert result == (2, 4) -def test_slice_locs(): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index + def test_slice_locs(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index - slob = slice(*idx.slice_locs(df.index[5], df.index[15])) - sliced = stacked[slob] - expected = df[5:16].stack() - tm.assert_almost_equal(sliced.values, expected.values) + slob = slice(*idx.slice_locs(df.index[5], df.index[15])) + sliced = stacked[slob] + expected = df[5:16].stack() + tm.assert_almost_equal(sliced.values, expected.values) - slob = slice( - *idx.slice_locs( - df.index[5] + timedelta(seconds=30), df.index[15] - timedelta(seconds=30) + slob = slice( + *idx.slice_locs( + df.index[5] + timedelta(seconds=30), + df.index[15] - timedelta(seconds=30), + ) ) - ) - sliced = stacked[slob] - expected = df[6:15].stack() - tm.assert_almost_equal(sliced.values, expected.values) - - -def test_slice_locs_with_type_mismatch(): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index - with pytest.raises(TypeError, match="^Level type mismatch"): - idx.slice_locs((1, 3)) - with pytest.raises(TypeError, match="^Level type mismatch"): - idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) - df = tm.makeCustomDataframe(5, 5) - stacked = df.stack() - idx = stacked.index - with pytest.raises(TypeError, match="^Level type mismatch"): - idx.slice_locs(timedelta(seconds=30)) - # TODO: Try creating a UnicodeDecodeError in exception message - with pytest.raises(TypeError, match="^Level type mismatch"): - idx.slice_locs(df.index[1], (16, "a")) - - -def test_slice_locs_not_sorted(): - index = MultiIndex( - levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], - codes=[ - np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0]), - ], - ) - msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" - with pytest.raises(KeyError, match=msg): - index.slice_locs((1, 0, 1), (2, 1, 0)) + sliced = stacked[slob] + expected = df[6:15].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + def test_slice_locs_with_type_mismatch(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs((1, 3)) + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[1], (16, "a")) + + def test_slice_locs_not_sorted(self): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" + with pytest.raises(KeyError, match=msg): + index.slice_locs((1, 0, 1), (2, 1, 0)) - # works - sorted_index, _ = index.sortlevel(0) - # should there be a test case here??? - sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + # works + sorted_index, _ = index.sortlevel(0) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + def test_slice_locs_not_contained(self): + # some searchsorted action -def test_slice_locs_not_contained(): - # some searchsorted action + index = MultiIndex( + levels=[[0, 2, 4, 6], [0, 2, 4]], + codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]], + ) - index = MultiIndex( - levels=[[0, 2, 4, 6], [0, 2, 4]], - codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]], - ) + result = index.slice_locs((1, 0), (5, 2)) + assert result == (3, 6) - result = index.slice_locs((1, 0), (5, 2)) - assert result == (3, 6) + result = index.slice_locs(1, 5) + assert result == (3, 6) - result = index.slice_locs(1, 5) - assert result == (3, 6) + result = index.slice_locs((2, 2), (5, 2)) + assert result == (3, 6) - result = index.slice_locs((2, 2), (5, 2)) - assert result == (3, 6) + result = index.slice_locs(2, 5) + assert result == (3, 6) - result = index.slice_locs(2, 5) - assert result == (3, 6) + result = index.slice_locs((1, 0), (6, 3)) + assert result == (3, 8) - result = index.slice_locs((1, 0), (6, 3)) - assert result == (3, 8) + result = index.slice_locs(-1, 10) + assert result == (0, len(index)) - result = index.slice_locs(-1, 10) - assert result == (0, len(index)) + @pytest.mark.parametrize( + "index_arr,expected,start_idx,end_idx", + [ + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")), + ], + ) + def test_slice_locs_with_missing_value( + self, index_arr, expected, start_idx, end_idx + ): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.slice_locs(start=start_idx, end=end_idx) + assert result == expected def test_putmask_with_wrong_mask(idx): @@ -130,67 +140,104 @@ def test_putmask_with_wrong_mask(idx): idx.putmask("foo", 1) -def test_get_indexer(): - major_axis = Index(np.arange(4)) - minor_axis = Index(np.arange(2)) +class TestGetIndexer: + def test_get_indexer(self): + major_axis = Index(np.arange(4)) + minor_axis = Index(np.arange(2)) - major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) - minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) - index = MultiIndex( - levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] - ) - idx1 = index[:5] - idx2 = index[[1, 3, 5]] + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] - r1 = idx1.get_indexer(idx2) - tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + r1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) - r1 = idx2.get_indexer(idx1, method="pad") - e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) - tm.assert_almost_equal(r1, e1) + r1 = idx2.get_indexer(idx1, method="pad") + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) - r2 = idx2.get_indexer(idx1[::-1], method="pad") - tm.assert_almost_equal(r2, e1[::-1]) + r2 = idx2.get_indexer(idx1[::-1], method="pad") + tm.assert_almost_equal(r2, e1[::-1]) - rffill1 = idx2.get_indexer(idx1, method="ffill") - tm.assert_almost_equal(r1, rffill1) + rffill1 = idx2.get_indexer(idx1, method="ffill") + tm.assert_almost_equal(r1, rffill1) - r1 = idx2.get_indexer(idx1, method="backfill") - e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) - tm.assert_almost_equal(r1, e1) + r1 = idx2.get_indexer(idx1, method="backfill") + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + tm.assert_almost_equal(r1, e1) - r2 = idx2.get_indexer(idx1[::-1], method="backfill") - tm.assert_almost_equal(r2, e1[::-1]) + r2 = idx2.get_indexer(idx1[::-1], method="backfill") + tm.assert_almost_equal(r2, e1[::-1]) - rbfill1 = idx2.get_indexer(idx1, method="bfill") - tm.assert_almost_equal(r1, rbfill1) + rbfill1 = idx2.get_indexer(idx1, method="bfill") + tm.assert_almost_equal(r1, rbfill1) - # pass non-MultiIndex - r1 = idx1.get_indexer(idx2.values) - rexp1 = idx1.get_indexer(idx2) - tm.assert_almost_equal(r1, rexp1) + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, rexp1) - r1 = idx1.get_indexer([1, 2, 3]) - assert (r1 == [-1, -1, -1]).all() + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() - # create index with duplicates - idx1 = Index(list(range(10)) + list(range(10))) - idx2 = Index(list(range(20))) + # create index with duplicates + idx1 = Index(list(range(10)) + list(range(10))) + idx2 = Index(list(range(20))) - msg = "Reindexing only valid with uniquely valued Index objects" - with pytest.raises(InvalidIndexError, match=msg): - idx1.get_indexer(idx2) + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + idx1.get_indexer(idx2) + def test_get_indexer_nearest(self): + midx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = ( + "method='nearest' not implemented yet for MultiIndex; " + "see GitHub issue 9365" + ) + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="nearest") + msg = "tolerance not implemented yet for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="pad", tolerance=2) + + def test_get_indexer_categorical_time(self): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(date_range("2012-01-01", periods=3, freq="H")), + ] + ) + result = midx.get_indexer(midx) + tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) -def test_get_indexer_nearest(): - midx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) - msg = "method='nearest' not implemented yet for MultiIndex; see GitHub issue 9365" - with pytest.raises(NotImplementedError, match=msg): - midx.get_indexer(["a"], method="nearest") - msg = "tolerance not implemented yet for MultiIndex" - with pytest.raises(NotImplementedError, match=msg): - midx.get_indexer(["a"], method="pad", tolerance=2) + @pytest.mark.parametrize( + "index_arr,labels,expected", + [ + ( + [[1, np.nan, 2], [3, 4, 5]], + [1, np.nan, 2], + np.array([-1, -1, -1], dtype=np.intp), + ), + ([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)), + ([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)), + ( + [[1, 2, 3], [np.nan, 4, 5]], + [np.nan, 4, 5], + np.array([-1, -1, -1], dtype=np.intp), + ), + ], + ) + def test_get_indexer_with_missing_value(self, index_arr, labels, expected): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.get_indexer(labels) + tm.assert_numpy_array_equal(result, expected) def test_getitem(idx): @@ -216,25 +263,6 @@ def test_getitem_group_select(idx): assert sorted_idx.get_loc("foo") == slice(0, 2) -def test_get_indexer_consistency(idx): - # See GH 16819 - if isinstance(idx, IntervalIndex): - pass - - if idx.is_unique or isinstance(idx, CategoricalIndex): - indexer = idx.get_indexer(idx[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - else: - e = "Reindexing only valid with uniquely valued Index objects" - with pytest.raises(InvalidIndexError, match=e): - idx.get_indexer(idx[0:2]) - - indexer, _ = idx.get_indexer_non_unique(idx[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - - @pytest.mark.parametrize("ind1", [[True] * 5, pd.Index([True] * 5)]) @pytest.mark.parametrize( "ind2", @@ -263,158 +291,155 @@ def test_getitem_bool_index_single(ind1, ind2): tm.assert_index_equal(idx[ind2], expected) -def test_get_loc(idx): - assert idx.get_loc(("foo", "two")) == 1 - assert idx.get_loc(("baz", "two")) == 3 - with pytest.raises(KeyError, match=r"^10$"): - idx.get_loc(("bar", "two")) - with pytest.raises(KeyError, match=r"^'quux'$"): - idx.get_loc("quux") - - msg = "only the default get_loc method is currently supported for MultiIndex" - with pytest.raises(NotImplementedError, match=msg): - idx.get_loc("foo", method="nearest") - - # 3 levels - index = MultiIndex( - levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], - codes=[ - np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0]), - ], - ) - with pytest.raises(KeyError, match=r"^\(1, 1\)$"): - index.get_loc((1, 1)) - assert index.get_loc((2, 0)) == slice(3, 5) - - -def test_get_loc_duplicates(): - index = Index([2, 2, 2, 2]) - result = index.get_loc(2) - expected = slice(0, 4) - assert result == expected - # pytest.raises(Exception, index.get_loc, 2) - - index = Index(["c", "a", "a", "b", "b"]) - rs = index.get_loc("c") - xp = 0 - assert rs == xp - - -def test_get_loc_level(): - index = MultiIndex( - levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], - codes=[ - np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0]), - ], - ) - loc, new_index = index.get_loc_level((0, 1)) - expected = slice(1, 2) - exp_index = index[expected].droplevel(0).droplevel(0) - assert loc == expected - assert new_index.equals(exp_index) - - loc, new_index = index.get_loc_level((0, 1, 0)) - expected = 1 - assert loc == expected - assert new_index is None - - with pytest.raises(KeyError, match=r"^\(2, 2\)$"): - index.get_loc_level((2, 2)) - # GH 22221: unused label - with pytest.raises(KeyError, match=r"^2$"): - index.drop(2).get_loc_level(2) - # Unused label on unsorted level: - with pytest.raises(KeyError, match=r"^2$"): - index.drop(1, level=2).get_loc_level(2, level=2) - - index = MultiIndex( - levels=[[2000], list(range(4))], - codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])], - ) - result, new_index = index.get_loc_level((2000, slice(None, None))) - expected = slice(None, None) - assert result == expected - assert new_index.equals(index.droplevel(0)) - - -@pytest.mark.parametrize("dtype1", [int, float, bool, str]) -@pytest.mark.parametrize("dtype2", [int, float, bool, str]) -def test_get_loc_multiple_dtypes(dtype1, dtype2): - # GH 18520 - levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)] - idx = pd.MultiIndex.from_product(levels) - assert idx.get_loc(idx[2]) == 2 - - -@pytest.mark.parametrize("level", [0, 1]) -@pytest.mark.parametrize("dtypes", [[int, float], [float, int]]) -def test_get_loc_implicit_cast(level, dtypes): - # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa - levels = [["a", "b"], ["c", "d"]] - key = ["b", "d"] - lev_dtype, key_dtype = dtypes - levels[level] = np.array([0, 1], dtype=lev_dtype) - key[level] = key_dtype(1) - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - -def test_get_loc_cast_bool(): - # GH 19086 : int is casted to bool, but not vice-versa - levels = [[False, True], np.arange(2, dtype="int64")] - idx = MultiIndex.from_product(levels) - - assert idx.get_loc((0, 1)) == 1 - assert idx.get_loc((1, 0)) == 2 - - with pytest.raises(KeyError, match=r"^\(False, True\)$"): - idx.get_loc((False, True)) - with pytest.raises(KeyError, match=r"^\(True, False\)$"): - idx.get_loc((True, False)) - - -@pytest.mark.parametrize("level", [0, 1]) -def test_get_loc_nan(level, nulls_fixture): - # GH 18485 : NaN in MultiIndex - levels = [["a", "b"], ["c", "d"]] - key = ["b", "d"] - levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) - key[level] = nulls_fixture - - if nulls_fixture is pd.NA: - pytest.xfail("MultiIndex from pd.NA in np.array broken; see GH 31883") - - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - -def test_get_loc_missing_nan(): - # GH 8569 - idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) - assert isinstance(idx.get_loc(1), slice) - with pytest.raises(KeyError, match=r"^3$"): - idx.get_loc(3) - with pytest.raises(KeyError, match=r"^nan$"): - idx.get_loc(np.nan) - with pytest.raises(TypeError, match="unhashable type: 'list'"): - # listlike/non-hashable raises TypeError - idx.get_loc([np.nan]) - - -def test_get_indexer_categorical_time(): - # https://github.com/pandas-dev/pandas/issues/21390 - midx = MultiIndex.from_product( - [ - Categorical(["a", "b", "c"]), - Categorical(date_range("2012-01-01", periods=3, freq="H")), - ] - ) - result = midx.get_indexer(midx) - tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) +class TestGetLoc: + def test_get_loc(self, idx): + assert idx.get_loc(("foo", "two")) == 1 + assert idx.get_loc(("baz", "two")) == 3 + with pytest.raises(KeyError, match=r"^10$"): + idx.get_loc(("bar", "two")) + with pytest.raises(KeyError, match=r"^'quux'$"): + idx.get_loc("quux") + + msg = "only the default get_loc method is currently supported for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.get_loc("foo", method="nearest") + + # 3 levels + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + with pytest.raises(KeyError, match=r"^\(1, 1\)$"): + index.get_loc((1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + def test_get_loc_duplicates(self): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + # FIXME: dont leave commented-out + # pytest.raises(Exception, index.get_loc, 2) + + index = Index(["c", "a", "a", "b", "b"]) + rs = index.get_loc("c") + xp = 0 + assert rs == xp + + def test_get_loc_level(self): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + with pytest.raises(KeyError, match=r"^\(2, 2\)$"): + index.get_loc_level((2, 2)) + # GH 22221: unused label + with pytest.raises(KeyError, match=r"^2$"): + index.drop(2).get_loc_level(2) + # Unused label on unsorted level: + with pytest.raises(KeyError, match=r"^2$"): + index.drop(1, level=2).get_loc_level(2, level=2) + + index = MultiIndex( + levels=[[2000], list(range(4))], + codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])], + ) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + @pytest.mark.parametrize("dtype1", [int, float, bool, str]) + @pytest.mark.parametrize("dtype2", [int, float, bool, str]) + def test_get_loc_multiple_dtypes(self, dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + @pytest.mark.parametrize("level", [0, 1]) + @pytest.mark.parametrize("dtypes", [[int, float], [float, int]]) + def test_get_loc_implicit_cast(self, level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_cast_bool(self): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype="int64")] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + with pytest.raises(KeyError, match=r"^\(False, True\)$"): + idx.get_loc((False, True)) + with pytest.raises(KeyError, match=r"^\(True, False\)$"): + idx.get_loc((True, False)) + + @pytest.mark.parametrize("level", [0, 1]) + def test_get_loc_nan(self, level, nulls_fixture): + # GH 18485 : NaN in MultiIndex + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) + key[level] = nulls_fixture + + if nulls_fixture is pd.NA: + pytest.xfail("MultiIndex from pd.NA in np.array broken; see GH 31883") + + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_missing_nan(self): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + with pytest.raises(KeyError, match=r"^3$"): + idx.get_loc(3) + with pytest.raises(KeyError, match=r"^nan$"): + idx.get_loc(np.nan) + with pytest.raises(TypeError, match="unhashable type: 'list'"): + # listlike/non-hashable raises TypeError + idx.get_loc([np.nan]) + + def test_get_loc_with_values_including_missing_values(self): + # issue 19132 + idx = MultiIndex.from_product([[np.nan, 1]] * 2) + expected = slice(0, 2, None) + assert idx.get_loc(np.nan) == expected + + idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]]) + expected = np.array([True, False, False, True]) + tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected) + + idx = MultiIndex.from_product([[np.nan, 1]] * 3) + expected = slice(2, 4, None) + assert idx.get_loc((np.nan, 1)) == expected def test_timestamp_multiindex_indexer(): @@ -444,45 +469,6 @@ def test_timestamp_multiindex_indexer(): tm.assert_series_equal(result, should_be) -def test_get_loc_with_values_including_missing_values(): - # issue 19132 - idx = MultiIndex.from_product([[np.nan, 1]] * 2) - expected = slice(0, 2, None) - assert idx.get_loc(np.nan) == expected - - idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]]) - expected = np.array([True, False, False, True]) - tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected) - - idx = MultiIndex.from_product([[np.nan, 1]] * 3) - expected = slice(2, 4, None) - assert idx.get_loc((np.nan, 1)) == expected - - -@pytest.mark.parametrize( - "index_arr,labels,expected", - [ - ( - [[1, np.nan, 2], [3, 4, 5]], - [1, np.nan, 2], - np.array([-1, -1, -1], dtype=np.intp), - ), - ([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)), - ([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)), - ( - [[1, 2, 3], [np.nan, 4, 5]], - [np.nan, 4, 5], - np.array([-1, -1, -1], dtype=np.intp), - ), - ], -) -def test_get_indexer_with_missing_value(index_arr, labels, expected): - # issue 19132 - idx = MultiIndex.from_arrays(index_arr) - result = idx.get_indexer(labels) - tm.assert_numpy_array_equal(result, expected) - - @pytest.mark.parametrize( "index_arr,expected,target,algo", [ @@ -512,21 +498,3 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id idx = MultiIndex.from_arrays(index_arr) result = idx.slice_indexer(start=start_idx, end=end_idx) assert result == expected - - -@pytest.mark.parametrize( - "index_arr,expected,start_idx,end_idx", - [ - ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None), - ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"), - ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")), - ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None), - ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"), - ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")), - ], -) -def test_slice_locs_with_missing_value(index_arr, expected, start_idx, end_idx): - # issue 19132 - idx = MultiIndex.from_arrays(index_arr) - result = idx.slice_locs(start=start_idx, end=end_idx) - assert result == expected From 89d5f876222c82166352e96ad069cbad05a4309b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:12:42 -0800 Subject: [PATCH 108/388] REF: standardize CategoricalIndex._shallow_copy usage (#32141) --- pandas/core/indexes/category.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index adb2ed9211bfe..caa6a9a93141f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -264,10 +264,14 @@ def _simple_new(cls, values, name=None, dtype=None): # -------------------------------------------------------------------- @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, dtype=None, **kwargs): - if dtype is None: - dtype = self.dtype - return super()._shallow_copy(values=values, dtype=dtype, **kwargs) + def _shallow_copy(self, values=None, **kwargs): + if values is None: + values = self.values + + cat = Categorical(values, dtype=self.dtype) + + name = kwargs.get("name", self.name) + return type(self)._simple_new(cat, name=name) def _is_dtype_compat(self, other) -> bool: """ @@ -422,9 +426,9 @@ def unique(self, level=None): if level is not None: self._validate_index_level(level) result = self.values.unique() - # CategoricalIndex._shallow_copy keeps original dtype - # if not otherwise specified - return self._shallow_copy(result, dtype=result.dtype) + # Use _simple_new instead of _shallow_copy to ensure we keep dtype + # of result, not self. + return type(self)._simple_new(result, name=self.name) @Appender(Index.duplicated.__doc__) def duplicated(self, keep="first"): @@ -450,7 +454,7 @@ def where(self, cond, other=None): other = self._na_value values = np.where(cond, self.values, other) cat = Categorical(values, dtype=self.dtype) - return self._shallow_copy(cat, **self._get_attributes_dict()) + return self._shallow_copy(cat) def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ From 25443f0960a31faf855134dc9cdb7ff01123be1d Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 22 Feb 2020 18:27:05 +0200 Subject: [PATCH 109/388] CLN: Some code cleanups (#32176) --- pandas/_libs/indexing.pyx | 3 +- pandas/_libs/sparse.pyx | 13 +-- pandas/_libs/tslibs/conversion.pyx | 5 +- pandas/_libs/tslibs/resolution.pyx | 41 +++---- pandas/_libs/tslibs/timedeltas.pyx | 177 ++++++++++++++++------------- pandas/_libs/tslibs/timezones.pyx | 14 ++- pandas/_libs/writers.pyx | 26 +++-- 7 files changed, 152 insertions(+), 127 deletions(-) diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx index cdccdb504571c..316943edee124 100644 --- a/pandas/_libs/indexing.pyx +++ b/pandas/_libs/indexing.pyx @@ -1,7 +1,6 @@ cdef class _NDFrameIndexerBase: """ - A base class for _NDFrameIndexer for fast instantiation and attribute - access. + A base class for _NDFrameIndexer for fast instantiation and attribute access. """ cdef public object obj, name, _ndim diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 4ca053a0ee83a..091ca42cb71dd 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -188,8 +188,7 @@ cdef class IntIndex(SparseIndex): return -1 @cython.wraparound(False) - cpdef ndarray[int32_t] lookup_array(self, ndarray[ - int32_t, ndim=1] indexer): + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): """ Vectorized lookup, returns ndarray[int32_t] """ @@ -424,12 +423,9 @@ cdef class BlockIndex(SparseIndex): """ Intersect two BlockIndex objects - Parameters - ---------- - Returns ------- - intersection : BlockIndex + BlockIndex """ cdef: BlockIndex y @@ -518,7 +514,7 @@ cdef class BlockIndex(SparseIndex): Returns ------- - union : BlockIndex + BlockIndex """ return BlockUnion(self, y.to_block_index()).result @@ -548,8 +544,7 @@ cdef class BlockIndex(SparseIndex): return -1 @cython.wraparound(False) - cpdef ndarray[int32_t] lookup_array(self, ndarray[ - int32_t, ndim=1] indexer): + cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer): """ Vectorized lookup, returns ndarray[int32_t] """ diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 57b4100fbceb0..6e978d495c325 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -84,12 +84,11 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): Parameters ---------- arr : ndarray - copy : boolean, default True + copy : bool, default True Returns ------- - result : ndarray with dtype datetime64[ns] - + ndarray with dtype datetime64[ns] """ cdef: Py_ssize_t i, n = arr.size diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 1e0eb7f97ec54..ecf31c15bb72c 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -110,8 +110,8 @@ def get_freq_group(freq) -> int: """ Return frequency code group of given frequency str or offset. - Example - ------- + Examples + -------- >>> get_freq_group('W-MON') 4000 @@ -193,8 +193,8 @@ class Resolution: """ Return resolution str against resolution code. - Example - ------- + Examples + -------- >>> Resolution.get_str(Resolution.RESO_SEC) 'second' """ @@ -205,8 +205,8 @@ class Resolution: """ Return resolution str against resolution code. - Example - ------- + Examples + -------- >>> Resolution.get_reso('second') 2 @@ -220,8 +220,8 @@ class Resolution: """ Return frequency str against resolution str. - Example - ------- + Examples + -------- >>> f.Resolution.get_freq_group('day') 4000 """ @@ -232,8 +232,8 @@ class Resolution: """ Return frequency str against resolution str. - Example - ------- + Examples + -------- >>> f.Resolution.get_freq('day') 'D' """ @@ -244,8 +244,8 @@ class Resolution: """ Return resolution str against frequency str. - Example - ------- + Examples + -------- >>> Resolution.get_str_from_freq('H') 'hour' """ @@ -256,8 +256,8 @@ class Resolution: """ Return resolution code against frequency str. - Example - ------- + Examples + -------- >>> Resolution.get_reso_from_freq('H') 4 @@ -273,8 +273,8 @@ class Resolution: Parameters ---------- - value : integer or float - freq : string + value : int or float + freq : str Frequency string Raises @@ -282,8 +282,8 @@ class Resolution: ValueError If the float cannot be converted to an integer at any resolution. - Example - ------- + Examples + -------- >>> Resolution.get_stride_from_decimal(1.5, 'T') (90, 'S') @@ -298,8 +298,9 @@ class Resolution: else: start_reso = cls.get_reso_from_freq(freq) if start_reso == 0: - raise ValueError("Could not convert to integer offset " - "at any resolution") + raise ValueError( + "Could not convert to integer offset at any resolution" + ) next_value = cls._reso_mult_map[start_reso] * value next_name = cls._reso_str_bump_map[freq] diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 3742506a7f8af..66660c5f641fd 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -37,51 +37,61 @@ from pandas._libs.tslibs.offsets import _Tick as Tick # Constants # components named tuple -Components = collections.namedtuple('Components', [ - 'days', 'hours', 'minutes', 'seconds', - 'milliseconds', 'microseconds', 'nanoseconds']) - - -cdef dict timedelta_abbrevs = { 'Y': 'Y', - 'y': 'Y', - 'M': 'M', - 'W': 'W', - 'w': 'W', - 'D': 'D', - 'd': 'D', - 'days': 'D', - 'day': 'D', - 'hours': 'h', - 'hour': 'h', - 'hr': 'h', - 'h': 'h', - 'm': 'm', - 'minute': 'm', - 'min': 'm', - 'minutes': 'm', - 't': 'm', - 's': 's', - 'seconds': 's', - 'sec': 's', - 'second': 's', - 'ms': 'ms', - 'milliseconds': 'ms', - 'millisecond': 'ms', - 'milli': 'ms', - 'millis': 'ms', - 'l': 'ms', - 'us': 'us', - 'microseconds': 'us', - 'microsecond': 'us', - 'micro': 'us', - 'micros': 'us', - 'u': 'us', - 'ns': 'ns', - 'nanoseconds': 'ns', - 'nano': 'ns', - 'nanos': 'ns', - 'nanosecond': 'ns', - 'n': 'ns'} +Components = collections.namedtuple( + "Components", + [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ], +) + +cdef dict timedelta_abbrevs = { + "Y": "Y", + "y": "Y", + "M": "M", + "W": "W", + "w": "W", + "D": "D", + "d": "D", + "days": "D", + "day": "D", + "hours": "h", + "hour": "h", + "hr": "h", + "h": "h", + "m": "m", + "minute": "m", + "min": "m", + "minutes": "m", + "t": "m", + "s": "s", + "seconds": "s", + "sec": "s", + "second": "s", + "ms": "ms", + "milliseconds": "ms", + "millisecond": "ms", + "milli": "ms", + "millis": "ms", + "l": "ms", + "us": "us", + "microseconds": "us", + "microsecond": "us", + "micro": "us", + "micros": "us", + "u": "us", + "ns": "ns", + "nanoseconds": "ns", + "nano": "ns", + "nanos": "ns", + "nanosecond": "ns", + "n": "ns", +} _no_input = object() @@ -137,9 +147,11 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: if is_integer_object(delta): return delta if PyDelta_Check(delta): - return (delta.days * 24 * 60 * 60 * 1000000 + - delta.seconds * 1000000 + - delta.microseconds) * 1000 + return ( + delta.days * 24 * 60 * 60 * 1_000_000 + + delta.seconds * 1_000_000 + + delta.microseconds + ) * 1000 raise TypeError(type(delta)) @@ -212,9 +224,8 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): Py_ssize_t i, n int64_t[:] iresult - if errors not in ('ignore', 'raise', 'coerce'): - raise ValueError("errors must be one of 'ignore', " - "'raise', or 'coerce'}") + if errors not in {'ignore', 'raise', 'coerce'}: + raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") n = values.shape[0] result = np.empty(n, dtype='m8[ns]') @@ -255,34 +266,34 @@ cpdef inline object precision_from_unit(object unit): int p if unit == 'Y': - m = 1000000000L * 31556952 + m = 1000000000 * 31556952 p = 9 elif unit == 'M': - m = 1000000000L * 2629746 + m = 1000000000 * 2629746 p = 9 elif unit == 'W': - m = 1000000000L * DAY_SECONDS * 7 + m = 1000000000 * DAY_SECONDS * 7 p = 9 elif unit == 'D' or unit == 'd': - m = 1000000000L * DAY_SECONDS + m = 1000000000 * DAY_SECONDS p = 9 elif unit == 'h': - m = 1000000000L * 3600 + m = 1000000000 * 3600 p = 9 elif unit == 'm': - m = 1000000000L * 60 + m = 1000000000 * 60 p = 9 elif unit == 's': - m = 1000000000L + m = 1000000000 p = 9 elif unit == 'ms': - m = 1000000L + m = 1000000 p = 6 elif unit == 'us': - m = 1000L + m = 1000 p = 3 elif unit == 'ns' or unit is None: - m = 1L + m = 1 p = 0 else: raise ValueError(f"cannot cast unit {unit}") @@ -383,13 +394,13 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: if len(number): if current_unit is None: current_unit = 'h' - m = 1000000000L * 3600 + m = 1000000000 * 3600 elif current_unit == 'h': current_unit = 'm' - m = 1000000000L * 60 + m = 1000000000 * 60 elif current_unit == 'm': current_unit = 's' - m = 1000000000L + m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_hhmmss = 1 @@ -408,7 +419,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: # hh:mm:ss (so current_unit is 'm') if current_unit != 'm': raise ValueError("expected hh:mm:ss format before .") - m = 1000000000L + m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_value = 1 @@ -437,9 +448,9 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: raise ValueError("no units specified") if len(frac) > 0 and len(frac) <= 3: - m = 10**(3 -len(frac)) * 1000L * 1000L + m = 10**(3 -len(frac)) * 1000 * 1000 elif len(frac) > 3 and len(frac) <= 6: - m = 10**(6 -len(frac)) * 1000L + m = 10**(6 -len(frac)) * 1000 else: m = 10**(9 -len(frac)) @@ -451,7 +462,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: elif current_unit is not None: if current_unit != 'm': raise ValueError("expected hh:mm:ss format") - m = 1000000000L + m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) @@ -1018,6 +1029,7 @@ cdef class _Timedelta(timedelta): **Using string input** >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.nanoseconds 42 @@ -1095,7 +1107,7 @@ cdef class _Timedelta(timedelta): Returns ------- - formatted : str + str See Also -------- @@ -1115,6 +1127,7 @@ cdef class _Timedelta(timedelta): -------- >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, ... milliseconds=10, microseconds=10, nanoseconds=12) + >>> td.isoformat() 'P6DT0H50M3.010010012S' >>> pd.Timedelta(hours=1, seconds=10).isoformat() @@ -1190,10 +1203,12 @@ class Timedelta(_Timedelta): value = nano + convert_to_timedelta64(timedelta(**kwargs), 'ns') except TypeError as e: - raise ValueError("cannot construct a Timedelta from the " - "passed arguments, allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]") + raise ValueError( + "cannot construct a Timedelta from the passed arguments, " + "allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]" + ) if unit in {'Y', 'y', 'M'}: raise ValueError( @@ -1230,8 +1245,9 @@ class Timedelta(_Timedelta): return NaT else: raise ValueError( - f"Value must be Timedelta, string, integer, " - f"float, timedelta or convertible, not {type(value).__name__}") + "Value must be Timedelta, string, integer, " + f"float, timedelta or convertible, not {type(value).__name__}" + ) if is_timedelta64_object(value): value = value.view('i8') @@ -1509,10 +1525,13 @@ cdef _rfloordiv(int64_t value, right): return right // value -cdef _broadcast_floordiv_td64(int64_t value, object other, - object (*operation)(int64_t value, - object right)): - """Boilerplate code shared by Timedelta.__floordiv__ and +cdef _broadcast_floordiv_td64( + int64_t value, + object other, + object (*operation)(int64_t value, object right) +): + """ + Boilerplate code shared by Timedelta.__floordiv__ and Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. Parameters diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 07947f6677c04..0ec3e2ad467e1 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -2,9 +2,11 @@ from datetime import timezone # dateutil compat from dateutil.tz import ( - tzutc as _dateutil_tzutc, + tzfile as _dateutil_tzfile, tzlocal as _dateutil_tzlocal, - tzfile as _dateutil_tzfile) + tzutc as _dateutil_tzutc, +) + from dateutil.tz import gettz as dateutil_gettz @@ -103,7 +105,9 @@ cpdef inline object maybe_get_tz(object tz): def _p_tz_cache_key(tz): - """ Python interface for cache function to facilitate testing.""" + """ + Python interface for cache function to facilitate testing. + """ return tz_cache_key(tz) @@ -120,7 +124,7 @@ cdef inline object tz_cache_key(object tz): dateutil timezones. Notes - ===== + ----- This cannot just be the hash of a timezone object. Unfortunately, the hashes of two dateutil tz objects which represent the same timezone are not equal (even though the tz objects will compare equal and represent @@ -313,7 +317,7 @@ cpdef bint tz_compare(object start, object end): Returns: ------- - compare : bint + bool """ # GH 18523 diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 73201e75c3c88..9e95dea979577 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -15,8 +15,13 @@ ctypedef fused pandas_string: @cython.boundscheck(False) @cython.wraparound(False) -def write_csv_rows(list data, ndarray data_index, - Py_ssize_t nlevels, ndarray cols, object writer): +def write_csv_rows( + list data, + ndarray data_index, + Py_ssize_t nlevels, + ndarray cols, + object writer +): """ Write the given data to the writer object, pre-allocating where possible for performance improvements. @@ -114,7 +119,9 @@ def convert_json_to_lines(arr: object) -> str: @cython.boundscheck(False) @cython.wraparound(False) def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: - """ return the maximum size of elements in a 1-dim string array """ + """ + Return the maximum size of elements in a 1-dim string array. + """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] pandas_string val @@ -130,7 +137,9 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: cpdef inline Py_ssize_t word_len(object val): - """ return the maximum length of a string or bytes value """ + """ + Return the maximum length of a string or bytes value. + """ cdef: Py_ssize_t l = 0 @@ -148,8 +157,10 @@ cpdef inline Py_ssize_t word_len(object val): @cython.boundscheck(False) @cython.wraparound(False) def string_array_replace_from_nan_rep( - ndarray[object, ndim=1] arr, object nan_rep, - object replace=None): + ndarray[object, ndim=1] arr, + object nan_rep, + object replace=np.nan +): """ Replace the values in the array with 'replacement' if they are 'nan_rep'. Return the same array. @@ -157,9 +168,6 @@ def string_array_replace_from_nan_rep( cdef: Py_ssize_t length = len(arr), i = 0 - if replace is None: - replace = np.nan - for i in range(length): if arr[i] == nan_rep: arr[i] = replace From 00e8e4ab0c5e4c7bfb3e356e660d9f088d4a82a4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:31:17 -0800 Subject: [PATCH 110/388] BUG: disallow invalid dtype to CategoricalDtype._from_values_or_dtype (#32169) --- pandas/core/dtypes/dtypes.py | 2 ++ pandas/tests/dtypes/test_dtypes.py | 5 +++++ pandas/tests/indexes/common.py | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d93ad973ff02d..0730de934b56c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -324,6 +324,8 @@ def _from_values_or_dtype( raise ValueError( "Cannot specify `categories` or `ordered` together with `dtype`." ) + elif not isinstance(dtype, CategoricalDtype): + raise ValueError(f"Cannot not construct CategoricalDtype from {dtype}") elif is_categorical(values): # If no "dtype" was passed, use the one from "values", but honor # the "ordered" and "categories" arguments diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index dd99b81fb6764..9eb5fda87d2d2 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -127,6 +127,11 @@ def test_from_values_or_dtype_raises(self, values, categories, ordered, dtype): with pytest.raises(ValueError, match=msg): CategoricalDtype._from_values_or_dtype(values, categories, ordered, dtype) + def test_from_values_or_dtype_invalid_dtype(self): + msg = "Cannot not construct CategoricalDtype from " + with pytest.raises(ValueError, match=msg): + CategoricalDtype._from_values_or_dtype(None, None, None, object) + def test_is_dtype(self, dtype): assert CategoricalDtype.is_dtype(dtype) assert CategoricalDtype.is_dtype("category") diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2073aa0727809..c9e762af3a303 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -605,7 +605,8 @@ def test_equals(self, indices): assert not indices.equals(np.array(indices)) # Cannot pass in non-int64 dtype to RangeIndex - if not isinstance(indices, RangeIndex): + if not isinstance(indices, (RangeIndex, CategoricalIndex)): + # TODO: CategoricalIndex can be re-allowed following GH#32167 same_values = Index(indices, dtype=object) assert indices.equals(same_values) assert same_values.equals(indices) From 80387ae95cc3e2b39ab75daf5f30c37c2c976057 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 08:31:38 -0800 Subject: [PATCH 111/388] REF: de-duplicate object-dtype handling (#32168) --- pandas/core/indexes/base.py | 72 +++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f9f343dd9056a..b1ac17361b2b2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -311,14 +311,7 @@ def __new__( # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.interval import IntervalIndex - closed = kwargs.pop("closed", None) - if is_dtype_equal(_o_dtype, dtype): - return IntervalIndex( - data, name=name, copy=copy, closed=closed, **kwargs - ).astype(object) - return IntervalIndex( - data, dtype=dtype, name=name, copy=copy, closed=closed, **kwargs - ) + return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs) elif ( is_datetime64_any_dtype(data) @@ -328,39 +321,19 @@ def __new__( # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import DatetimeIndex - if is_dtype_equal(_o_dtype, dtype): - # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, - # will raise in the where `data` is already tz-aware. So - # we leave it out of this step and cast to object-dtype after - # the DatetimeIndex construction. - # Note we can pass copy=False because the .astype below - # will always make a copy - return DatetimeIndex(data, copy=False, name=name, **kwargs).astype( - object - ) - else: - return DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) + return _maybe_asobject(dtype, DatetimeIndex, data, copy, name, **kwargs) elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import TimedeltaIndex - if is_dtype_equal(_o_dtype, dtype): - # Note we can pass copy=False because the .astype below - # will always make a copy - return TimedeltaIndex(data, copy=False, name=name, **kwargs).astype( - object - ) - else: - return TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) + return _maybe_asobject(dtype, TimedeltaIndex, data, copy, name, **kwargs) elif is_period_dtype(data) or is_period_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import PeriodIndex - if is_dtype_equal(_o_dtype, dtype): - return PeriodIndex(data, copy=False, name=name, **kwargs).astype(object) - return PeriodIndex(data, dtype=dtype, copy=copy, name=name, **kwargs) + return _maybe_asobject(dtype, PeriodIndex, data, copy, name, **kwargs) # extension dtype elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): @@ -5775,3 +5748,40 @@ def _try_convert_to_int_array( pass raise ValueError + + +def _maybe_asobject(dtype, klass, data, copy: bool, name: Label, **kwargs): + """ + If an object dtype was specified, create the non-object Index + and then convert it to object. + + Parameters + ---------- + dtype : np.dtype, ExtensionDtype, str + klass : Index subclass + data : list-like + copy : bool + name : hashable + **kwargs + + Returns + ------- + Index + + Notes + ----- + We assume that calling .astype(object) on this klass will make a copy. + """ + + # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, + # will raise in the where `data` is already tz-aware. So + # we leave it out of this step and cast to object-dtype after + # the DatetimeIndex construction. + + if is_dtype_equal(_o_dtype, dtype): + # Note we can pass copy=False because the .astype below + # will always make a copy + index = klass(data, copy=False, name=name, **kwargs) + return index.astype(object) + + return klass(data, dtype=dtype, copy=copy, name=name, **kwargs) From f9b49c8ee4d711c721b41496f3e8d3d25d73231e Mon Sep 17 00:00:00 2001 From: SAI SRAVAN MEDICHERLA <51117801+PSY27@users.noreply.github.com> Date: Sat, 22 Feb 2020 22:03:46 +0530 Subject: [PATCH 112/388] used f-string (#32133) --- doc/source/user_guide/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index bd19b35e8d9e8..c34247a49335d 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3815,7 +3815,7 @@ The right-hand side of the sub-expression (after a comparison operator) can be: .. code-block:: ipython string = "HolyMoly'" - store.select('df', 'index == %s' % string) + store.select('df', f'index == {string}') The latter will **not** work and will raise a ``SyntaxError``.Note that there's a single quote followed by a double quote in the ``string`` From 020dcce17e3bd0983fca5b02556bd431140ab371 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 09:53:08 -0800 Subject: [PATCH 113/388] BUG: using loc[int] with object index (#31905) * BUG: using loc[int] with object index * whatsnew --- doc/source/whatsnew/v1.1.0.rst | 4 ++++ pandas/core/indexes/base.py | 2 +- pandas/core/series.py | 4 +++- pandas/tests/indexing/test_categorical.py | 6 +----- pandas/tests/indexing/test_loc.py | 12 ++++++++++-- pandas/tests/indexing/test_scalar.py | 18 +++++------------- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 34a67836f9675..7449c62a5ad31 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -56,6 +56,8 @@ Other API changes - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) +- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) +- Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -160,6 +162,8 @@ Indexing - Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`) - Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`) - Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`) +- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`) +- Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b1ac17361b2b2..c896e68f7a188 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3111,7 +3111,7 @@ def _convert_scalar_indexer(self, key, kind: str_t): self._invalid_indexer("label", key) elif kind == "loc" and is_integer(key): - if not self.holds_integer(): + if not (is_integer_dtype(self.dtype) or is_object_dtype(self.dtype)): self._invalid_indexer("label", key) return key diff --git a/pandas/core/series.py b/pandas/core/series.py index 2182374337c84..77f9df0f4e933 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -969,9 +969,11 @@ def _get_value(self, label, takeable: bool = False): if takeable: return self._values[label] + # Similar to Index.get_value, but we do not fall back to positional + loc = self.index.get_loc(label) # We assume that _convert_scalar_indexer has already been called, # with kind="loc", if necessary, by the time we get here - return self.index.get_value(self, label) + return self.index._get_values_for_loc(self, loc, label) def __setitem__(self, key, value): key = com.apply_if_callable(key, self) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index da935b1c911d0..8a8ac584c16c2 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -82,11 +82,7 @@ def test_loc_scalar(self): with pytest.raises(TypeError, match=msg): df.loc["d", "C"] = 10 - msg = ( - "cannot do label indexing on CategoricalIndex with these " - r"indexers \[1\] of type int" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^1$"): df.loc[1] def test_getitem_scalar(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 71d85ed8bda9b..276d11a67ad18 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -16,7 +16,7 @@ class TestLoc(Base): def test_loc_getitem_int(self): # int label - self.check_result("loc", 2, typs=["labels"], fails=TypeError) + self.check_result("loc", 2, typs=["labels"], fails=KeyError) def test_loc_getitem_label(self): @@ -34,7 +34,7 @@ def test_loc_getitem_label_out_of_range(self): self.check_result( "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError, ) - self.check_result("loc", 20, typs=["labels"], fails=TypeError) + self.check_result("loc", 20, typs=["labels"], fails=KeyError) self.check_result("loc", 20, typs=["ts"], axes=0, fails=TypeError) self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError) @@ -967,3 +967,11 @@ def test_loc_set_dataframe_multiindex(): result = expected.copy() result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]] tm.assert_frame_equal(result, expected) + + +def test_loc_mixed_int_float(): + # GH#19456 + ser = pd.Series(range(2), pd.Index([1, 2.0], dtype=object)) + + result = ser.loc[1] + assert result == 0 diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index c4750778e2eb8..25939e63c256b 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -138,16 +138,12 @@ def test_series_at_raises_type_error(self): result = ser.loc["a"] assert result == 1 - msg = ( - "cannot do label indexing on Index " - r"with these indexers \[0\] of type int" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): ser.at[0] - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): ser.loc[0] - def test_frame_raises_type_error(self): + def test_frame_raises_key_error(self): # GH#31724 .at should match .loc df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) result = df.at["a", "A"] @@ -155,13 +151,9 @@ def test_frame_raises_type_error(self): result = df.loc["a", "A"] assert result == 1 - msg = ( - "cannot do label indexing on Index " - r"with these indexers \[0\] of type int" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): df.at["a", 0] - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^0$"): df.loc["a", 0] def test_series_at_raises_key_error(self): From b2ebd5ae14580dde793e40097c6a283d82c69ad9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 09:53:45 -0800 Subject: [PATCH 114/388] dont skip keyerror for IntervalIndex (#31936) --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c53e690b5f46c..1644b4203052b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1302,12 +1302,12 @@ def _validate_read_indexer( not_found = list(set(key) - set(ax)) raise KeyError(f"{not_found} not in index") - # we skip the warning on Categorical/Interval + # we skip the warning on Categorical # as this check is actually done (check for # non-missing values), but a bit later in the # code, so we want to avoid warning & then # just raising - if not (ax.is_categorical() or ax.is_interval()): + if not ax.is_categorical(): raise KeyError( "Passing list-likes to .loc or [] with any missing labels " "is no longer supported, see " From 016482a74d8bf572d080cb646115e7648ac606c9 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 22 Feb 2020 21:59:05 +0100 Subject: [PATCH 115/388] Turned class based tests into function based tests (#32184) --- pandas/tests/series/test_validate.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index c4311f507f7ee..511d24ca7fa29 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,20 +1,18 @@ import pytest -class TestSeriesValidate: +@pytest.mark.parametrize( + "func", + ["reset_index", "_set_name", "sort_values", "sort_index", "rename", "dropna"], +) +@pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) +def test_validate_bool_args(string_series, func, inplace): """Tests for error handling related to data types of method arguments.""" + msg = 'For argument "inplace" expected type bool' + kwargs = dict(inplace=inplace) - @pytest.mark.parametrize( - "func", - ["reset_index", "_set_name", "sort_values", "sort_index", "rename", "dropna"], - ) - @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) - def test_validate_bool_args(self, string_series, func, inplace): - msg = 'For argument "inplace" expected type bool' - kwargs = dict(inplace=inplace) + if func == "_set_name": + kwargs["name"] = "hello" - if func == "_set_name": - kwargs["name"] = "hello" - - with pytest.raises(ValueError, match=msg): - getattr(string_series, func)(**kwargs) + with pytest.raises(ValueError, match=msg): + getattr(string_series, func)(**kwargs) From dbc446ad3e9d3bf52c01ec1088ef5f3b48bfef89 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 13:07:29 -0800 Subject: [PATCH 116/388] CLN: no kwargs for take_with_is_copy (#32181) --- pandas/core/generic.py | 6 ++---- pandas/core/series.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 40e59f04192a6..a6ab0d4034ddb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3262,9 +3262,7 @@ class max_speed ) return self._constructor(new_data).__finalize__(self) - def _take_with_is_copy( - self: FrameOrSeries, indices, axis=0, **kwargs - ) -> FrameOrSeries: + def _take_with_is_copy(self: FrameOrSeries, indices, axis=0) -> FrameOrSeries: """ Internal version of the `take` method that sets the `_is_copy` attribute to keep track of the parent dataframe (using in indexing @@ -3272,7 +3270,7 @@ def _take_with_is_copy( See the docstring of `take` for full explanation of the parameters. """ - result = self.take(indices=indices, axis=axis, **kwargs) + result = self.take(indices=indices, axis=axis) # Maybe set copy if we didn't actually change the index. if not result._get_axis(axis).equals(self._get_axis(axis)): result._set_is_copy(self) diff --git a/pandas/core/series.py b/pandas/core/series.py index 77f9df0f4e933..3ded02598963c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -812,7 +812,7 @@ def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series": new_values, index=new_index, fastpath=True ).__finalize__(self) - def _take_with_is_copy(self, indices, axis=0, **kwargs): + def _take_with_is_copy(self, indices, axis=0): """ Internal version of the `take` method that sets the `_is_copy` attribute to keep track of the parent dataframe (using in indexing @@ -821,7 +821,7 @@ def _take_with_is_copy(self, indices, axis=0, **kwargs): See the docstring of `take` for full explanation of the parameters. """ - return self.take(indices=indices, axis=axis, **kwargs) + return self.take(indices=indices, axis=axis) def _ixs(self, i: int, axis: int = 0): """ From ab56348a32a40a65d2e42b5e93d7452f986e000d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20=C4=8Cervenka?= Date: Sun, 23 Feb 2020 02:00:57 +0100 Subject: [PATCH 117/388] CLN: F-string in pandas/tests/indexes/datetimes/test_to_period.py (#29547) (#32189) --- pandas/tests/indexes/datetimes/test_to_period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_to_period.py b/pandas/tests/indexes/datetimes/test_to_period.py index 5567f98c52211..ddbb43787abb4 100644 --- a/pandas/tests/indexes/datetimes/test_to_period.py +++ b/pandas/tests/indexes/datetimes/test_to_period.py @@ -43,7 +43,7 @@ def test_dti_to_period(self): @pytest.mark.parametrize("month", MONTHS) def test_to_period_quarterly(self, month): # make sure we can make the round trip - freq = "Q-{month}".format(month=month) + freq = f"Q-{month}" rng = period_range("1989Q3", "1991Q3", freq=freq) stamps = rng.to_timestamp() result = stamps.to_period(freq) From 41bc226841eb59ccdfa279734dac98f7debc6249 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sun, 23 Feb 2020 08:57:07 -0600 Subject: [PATCH 118/388] BUG: Fix construction of Categorical from pd.NA (#31939) --- doc/source/whatsnew/v1.0.2.rst | 3 ++- pandas/_libs/hashtable_class_helper.pxi.in | 9 +++++++-- pandas/tests/arrays/categorical/test_constructors.py | 12 ++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index affe019d0ac86..dc47e010dacdc 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -65,6 +65,7 @@ Bug fixes **Categorical** - Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) +- Fixed bug where :meth:`Categorical` constructor would raise a ``TypeError`` when given a numpy array containing ``pd.NA``. (:issue:`31927`) - Bug in :class:`Categorical` that would ignore or crash when calling :meth:`Series.replace` with a list-like ``to_replace`` (:issue:`31720`) **I/O** @@ -85,4 +86,4 @@ Bug fixes Contributors ~~~~~~~~~~~~ -.. contributors:: v1.0.1..v1.0.2|HEAD \ No newline at end of file +.. contributors:: v1.0.1..v1.0.2|HEAD diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 6671375f628e7..811025a4b5764 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -10,6 +10,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # ---------------------------------------------------------------------- from pandas._libs.tslibs.util cimport get_c_string +from pandas._libs.missing cimport C_NA {{py: @@ -1032,8 +1033,12 @@ cdef class PyObjectHashTable(HashTable): val = values[i] hash(val) - if ignore_na and ((val != val or val is None) - or (use_na_value and val == na_value)): + if ignore_na and ( + (val is C_NA) + or (val != val) + or (val is None) + or (use_na_value and val == na_value) + ): # if missing values do not count as unique values (i.e. if # ignore_na is True), skip the hashtable entry for them, and # replace the corresponding label with na_sentinel diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index dbd8fd8df67c1..d5537359d6948 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -458,6 +458,18 @@ def test_constructor_with_categorical_categories(self): result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"])) tm.assert_categorical_equal(result, expected) + @pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list]) + def test_construction_with_null(self, klass, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/31927 + values = klass(["a", nulls_fixture, "b"]) + result = Categorical(values) + + dtype = CategoricalDtype(["a", "b"]) + codes = [0, -1, 1] + expected = Categorical.from_codes(codes=codes, dtype=dtype) + + tm.assert_categorical_equal(result, expected) + def test_from_codes(self): # too few categories From 20a84a5d8f554ecd52490e3111fd016d7b71a0e7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sun, 23 Feb 2020 08:58:59 -0600 Subject: [PATCH 119/388] BUG: Avoid ambiguous condition in GroupBy.first / last (#32124) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/groupby.pyx | 6 +++-- pandas/tests/groupby/test_nth.py | 40 ++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index dc47e010dacdc..9a7a1c92451bc 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -77,6 +77,7 @@ Bug fixes - Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) +- Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index edc44f1c94589..27b3095d8cb4f 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -22,6 +22,8 @@ from pandas._libs.algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, from pandas._libs.algos import (take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers) +from pandas._libs.missing cimport checknull + cdef int64_t NPY_NAT = get_nat() _int64_max = np.iinfo(np.int64).max @@ -887,7 +889,7 @@ def group_last(rank_t[:, :] out, for j in range(K): val = values[i, j] - if val == val: + if not checknull(val): # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 @@ -976,7 +978,7 @@ def group_nth(rank_t[:, :] out, for j in range(K): val = values[i, j] - if val == val: + if not checknull(val): # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 0f850f2e94581..b1476f1059d84 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -54,6 +54,46 @@ def test_first_last_nth(df): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_na_object(method, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby( + "a" + ) + result = getattr(groups, method)() + + if method == "first": + values = [1, 3] + else: + values = [2, 3] + + values = np.array(values, dtype=result["b"].dtype) + idx = pd.Index([1, 2], name="a") + expected = pd.DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index", [0, -1]) +def test_nth_with_na_object(index, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby( + "a" + ) + result = groups.nth(index) + + if index == 0: + values = [1, 3] + else: + values = [2, nulls_fixture] + + values = np.array(values, dtype=result["b"].dtype) + idx = pd.Index([1, 2], name="a") + expected = pd.DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + def test_first_last_nth_dtypes(df_mixed_floats): df = df_mixed_floats.copy() From 7017599821e02ba95282848c12f7d3b5f2ce670a Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 23 Feb 2020 15:02:16 +0000 Subject: [PATCH 120/388] BUG: groupby nunique changing values (#32175) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/groupby/generic.py | 24 ++++++------------------ pandas/tests/groupby/test_function.py | 2 ++ 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 9a7a1c92451bc..f491774991090 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) +- Fixed regression in :meth:`DataFrameGroupBy.nunique` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 37b6429167646..1bb512aee39e2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -591,30 +591,18 @@ def nunique(self, dropna: bool = True) -> Series: val = self.obj._internal_get_values() - # GH 27951 - # temporary fix while we wait for NumPy bug 12629 to be fixed - val[isna(val)] = np.datetime64("NaT") - - try: - sorter = np.lexsort((val, ids)) - except TypeError: # catches object dtypes - msg = f"val.dtype must be object, got {val.dtype}" - assert val.dtype == object, msg - val, _ = algorithms.factorize(val, sort=False) - sorter = np.lexsort((val, ids)) - _isna = lambda a: a == -1 - else: - _isna = isna - - ids, val = ids[sorter], val[sorter] + codes, _ = algorithms.factorize(val, sort=False) + sorter = np.lexsort((codes, ids)) + codes = codes[sorter] + ids = ids[sorter] # group boundaries are where group ids change # unique observations are where sorted values change idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] - inc = np.r_[1, val[1:] != val[:-1]] + inc = np.r_[1, codes[1:] != codes[:-1]] # 1st item of each group is a new unique observation - mask = _isna(val) + mask = codes == -1 if dropna: inc[idx] = 1 inc[mask] = 0 diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 6205dfb87bbd0..c402ca194648f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1017,6 +1017,7 @@ def test_frame_describe_unstacked_format(): @pytest.mark.parametrize("dropna", [False, True]) def test_series_groupby_nunique(n, m, sort, dropna): def check_nunique(df, keys, as_index=True): + original_df = df.copy() gr = df.groupby(keys, as_index=as_index, sort=sort) left = gr["julie"].nunique(dropna=dropna) @@ -1026,6 +1027,7 @@ def check_nunique(df, keys, as_index=True): right = right.reset_index(drop=True) tm.assert_series_equal(left, right, check_names=False) + tm.assert_frame_equal(df, original_df) days = date_range("2015-08-23", periods=10) From 7b99f035672c54656e770b0bf317d24d7bb9a7ba Mon Sep 17 00:00:00 2001 From: Mabroor Ahmed Date: Sun, 23 Feb 2020 15:16:21 +0000 Subject: [PATCH 121/388] TST: Fixed bare pytest.raises in test_window.py (#32158) --- pandas/tests/window/test_window.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index cc29ab4f2cd62..41b9d9e84f27e 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -29,14 +29,15 @@ def test_constructor(self, which): c(win_type="boxcar", window=2, min_periods=1, center=False) # not valid + msg = "|".join(["min_periods must be an integer", "center must be a boolean"]) for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): c(win_type="boxcar", window=2, min_periods=w) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): c(win_type="boxcar", window=2, min_periods=1, center=w) for wt in ["foobar", 1]: - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Invalid win_type"): c(win_type=wt, window=2) @td.skip_if_no_scipy From 0edd2d9d0b64a94bb90a165c7bd75a191c7fab49 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 23 Feb 2020 07:23:39 -0800 Subject: [PATCH 122/388] annotations (#32193) --- pandas/core/internals/managers.py | 35 +++++++++++++++---------------- pandas/core/ops/__init__.py | 16 ++++---------- pandas/core/ops/array_ops.py | 17 ++++++--------- 3 files changed, 27 insertions(+), 41 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 69ceb95985140..d4f9c15a9f73f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -8,6 +8,7 @@ import numpy as np from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib +from pandas._typing import DtypeObj from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -847,7 +848,7 @@ def to_dict(self, copy: bool = True): return {dtype: self.combine(blocks, copy=copy) for dtype, blocks in bd.items()} - def fast_xs(self, loc): + def fast_xs(self, loc: int): """ get a cross sectional for a given location in the items ; handle dups @@ -883,12 +884,12 @@ def fast_xs(self, loc): for i, rl in enumerate(blk.mgr_locs): result[rl] = blk.iget((i, loc)) - if is_extension_array_dtype(dtype): + if isinstance(dtype, ExtensionDtype): result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) return result - def consolidate(self): + def consolidate(self) -> "BlockManager": """ Join together blocks having same dtype @@ -940,7 +941,7 @@ def get(self, item): new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True ) - def iget(self, i): + def iget(self, i: int) -> "SingleBlockManager": """ Return the data as a SingleBlockManager. """ @@ -1377,7 +1378,7 @@ def canonicalize(block): block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks) ) - def unstack(self, unstacker_func, fill_value): + def unstack(self, unstacker_func, fill_value) -> "BlockManager": """ Return a BlockManager with all blocks unstacked.. @@ -1396,8 +1397,8 @@ def unstack(self, unstacker_func, fill_value): dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items) new_columns = dummy.get_new_columns() new_index = dummy.get_new_index() - new_blocks = [] - columns_mask = [] + new_blocks: List[Block] = [] + columns_mask: List[np.ndarray] = [] for blk in self.blocks: blocks, mask = blk._unstack( @@ -1478,7 +1479,7 @@ def _post_setstate(self): pass @property - def _block(self): + def _block(self) -> Block: return self.blocks[0] @property @@ -1495,14 +1496,14 @@ def _blklocs(self): """ compat with BlockManager """ return None - def get_slice(self, slobj, axis=0): + def get_slice(self, slobj: slice, axis: int = 0) -> "SingleBlockManager": if axis >= self.ndim: raise IndexError("Requested axis not found in manager") - return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True,) + return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True) @property - def index(self): + def index(self) -> Index: return self.axes[0] @property @@ -1516,7 +1517,7 @@ def array_dtype(self): def get_dtype_counts(self): return {self.dtype.name: 1} - def get_dtypes(self): + def get_dtypes(self) -> np.ndarray: return np.array([self._block.dtype]) def external_values(self): @@ -1527,7 +1528,7 @@ def internal_values(self): """The array that Series._values returns""" return self._block.internal_values() - def get_values(self): + def get_values(self) -> np.ndarray: """ return a dense type view """ return np.array(self._block.to_dense(), copy=False) @@ -1535,7 +1536,7 @@ def get_values(self): def _can_hold_na(self) -> bool: return self._block._can_hold_na - def is_consolidated(self): + def is_consolidated(self) -> bool: return True def _consolidate_check(self): @@ -1813,9 +1814,7 @@ def _shape_compat(x): return stacked, placement -def _interleaved_dtype( - blocks: List[Block], -) -> Optional[Union[np.dtype, ExtensionDtype]]: +def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]: """ Find the common dtype for `blocks`. @@ -1825,7 +1824,7 @@ def _interleaved_dtype( Returns ------- - dtype : Optional[Union[np.dtype, ExtensionDtype]] + dtype : np.dtype, ExtensionDtype, or None None is returned when `blocks` is empty. """ if not len(blocks): diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index b74dea686a89f..1ef3889703341 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -5,22 +5,17 @@ """ import datetime import operator -from typing import TYPE_CHECKING, Optional, Set, Tuple, Union +from typing import TYPE_CHECKING, Optional, Set, Tuple import numpy as np from pandas._libs import Timedelta, Timestamp, lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401 -from pandas._typing import Level +from pandas._typing import ArrayLike, Level from pandas.util._decorators import Appender from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCExtensionArray, - ABCIndexClass, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna from pandas.core.construction import extract_array @@ -451,10 +446,7 @@ def _align_method_SERIES(left, right, align_asobject=False): def _construct_result( - left: ABCSeries, - result: Union[np.ndarray, ABCExtensionArray], - index: ABCIndexClass, - name, + left: ABCSeries, result: ArrayLike, index: ABCIndexClass, name, ): """ Construct an appropriately-labelled Series from the result of an op. diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 37a4a6eddaebe..10e3f32de3958 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -4,11 +4,12 @@ """ from functools import partial import operator -from typing import Any, Optional, Union +from typing import Any, Optional import numpy as np from pandas._libs import Timedelta, Timestamp, lib, ops as libops +from pandas._typing import ArrayLike from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, @@ -155,9 +156,7 @@ def na_arithmetic_op(left, right, op, str_rep: str): return missing.dispatch_fill_zeros(op, left, right, result) -def arithmetic_op( - left: Union[np.ndarray, ABCExtensionArray], right: Any, op, str_rep: str -): +def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): """ Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... @@ -200,9 +199,7 @@ def arithmetic_op( return res_values -def comparison_op( - left: Union[np.ndarray, ABCExtensionArray], right: Any, op -) -> Union[np.ndarray, ABCExtensionArray]: +def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. @@ -215,7 +212,7 @@ def comparison_op( Returns ------- - ndarrray or ExtensionArray + ndarray or ExtensionArray """ # NB: We assume extract_array has already been called on left and right lvalues = left @@ -302,9 +299,7 @@ def na_logical_op(x: np.ndarray, y, op): return result.reshape(x.shape) -def logical_op( - left: Union[np.ndarray, ABCExtensionArray], right: Any, op -) -> Union[np.ndarray, ABCExtensionArray]: +def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: """ Evaluate a logical operation `|`, `&`, or `^`. From 034fab5bd975758539d039953032ddfe0533553b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 23 Feb 2020 07:50:37 -0800 Subject: [PATCH 123/388] BUG: Index(categorical, dtype=object) not returning object dtype (#32167) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/test_index_new.py | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7449c62a5ad31..a4c991dcc166c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -103,7 +103,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - +- Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c896e68f7a188..53c4dfde2775b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -304,7 +304,7 @@ def __new__( # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.category import CategoricalIndex - return CategoricalIndex(data, dtype=dtype, copy=copy, name=name, **kwargs) + return _maybe_asobject(dtype, CategoricalIndex, data, copy, name, **kwargs) # interval elif is_interval_dtype(data) or is_interval_dtype(dtype): diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index e150df971da2d..33f61de6a4ebf 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -6,7 +6,7 @@ from pandas.core.dtypes.common import is_unsigned_integer_dtype -from pandas import Index, Int64Index, MultiIndex, UInt64Index +from pandas import CategoricalIndex, Index, Int64Index, MultiIndex, UInt64Index import pandas._testing as tm @@ -47,3 +47,9 @@ def test_constructor_dtypes_to_object(self, cast_index, vals): assert type(index) is Index assert index.dtype == object + + def test_constructor_categorical_to_object(self): + # GH#32167 Categorical data and dtype=object should return object-dtype + ci = CategoricalIndex(range(5)) + result = Index(ci, dtype=object) + assert not isinstance(result, CategoricalIndex) From 58df0ac699a7b60ac5724da689c930e5b58134e6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 23 Feb 2020 07:52:04 -0800 Subject: [PATCH 124/388] CLN: indexing comments and cleanups (#32082) --- pandas/core/indexing.py | 44 +++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1644b4203052b..5adc65b488399 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -732,14 +732,15 @@ def _getitem_lowerdim(self, tup: Tuple): raise IndexingError("Too many indexers. handle elsewhere") for i, key in enumerate(tup): - if is_label_like(key) or isinstance(key, tuple): + if is_label_like(key): + # We don't need to check for tuples here because those are + # caught by the _is_nested_tuple_indexer check above. section = self._getitem_axis(key, axis=i) - # we have yielded a scalar ? - if not is_list_like_indexer(section): - return section - - elif section.ndim == self.ndim: + # We should never have a scalar section here, because + # _getitem_lowerdim is only called after a check for + # is_scalar_access, which that would be. + if section.ndim == self.ndim: # we're in the middle of slicing through a MultiIndex # revise the key wrt to `section` by inserting an _NS new_key = tup[:i] + (_NS,) + tup[i + 1 :] @@ -757,7 +758,7 @@ def _getitem_lowerdim(self, tup: Tuple): # slice returns a new object. if com.is_null_slice(new_key): return section - # This is an elided recursive call to iloc/loc/etc' + # This is an elided recursive call to iloc/loc return getattr(section, self.name)[new_key] raise IndexingError("not applicable") @@ -1013,15 +1014,7 @@ def _getitem_tuple(self, tup: Tuple): return self._getitem_tuple_same_dim(tup) def _get_label(self, label, axis: int): - if self.ndim == 1: - # for perf reasons we want to try _xs first - # as its basically direct indexing - # but will fail when the index is not present - # see GH5667 - return self.obj._xs(label, axis=axis) - elif isinstance(label, tuple) and isinstance(label[axis], slice): - raise IndexingError("no slices here, handle elsewhere") - + # GH#5667 this will fail if the label is not present in the axis. return self.obj._xs(label, axis=axis) def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): @@ -1298,7 +1291,7 @@ def _validate_read_indexer( # We (temporarily) allow for some missing keys with .loc, except in # some cases (e.g. setting) in which "raise_missing" will be False - if not (self.name == "loc" and not raise_missing): + if raise_missing: not_found = list(set(key) - set(ax)) raise KeyError(f"{not_found} not in index") @@ -1363,10 +1356,7 @@ def _validate_key(self, key, axis: int): else: raise ValueError(f"Can only index by location with a [{self._valid_types}]") - def _has_valid_setitem_indexer(self, indexer): - self._has_valid_positional_setitem_indexer(indexer) - - def _has_valid_positional_setitem_indexer(self, indexer) -> bool: + def _has_valid_setitem_indexer(self, indexer) -> bool: """ Validate that a positional indexer cannot enlarge its target will raise if needed, does not modify the indexer externally. @@ -1376,7 +1366,7 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool: bool """ if isinstance(indexer, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") + raise IndexError("iloc cannot enlarge its target object") else: if not isinstance(indexer, tuple): indexer = _tuplify(self.ndim, indexer) @@ -1389,11 +1379,9 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool: pass elif is_integer(i): if i >= len(ax): - raise IndexError( - f"{self.name} cannot enlarge its target object" - ) + raise IndexError("iloc cannot enlarge its target object") elif isinstance(i, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") + raise IndexError("iloc cannot enlarge its target object") return True @@ -1520,8 +1508,8 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): return key elif is_float(key): + # _validate_indexer call will always raise labels._validate_indexer("positional", key, "iloc") - return key self._validate_key(key, axis) return key @@ -1582,7 +1570,7 @@ def _setitem_with_indexer(self, indexer, value): # this correctly sets the dtype and avoids cache issues # essentially this separates out the block that is needed # to possibly be modified - if self.ndim > 1 and i == self.obj._info_axis_number: + if self.ndim > 1 and i == info_axis: # add the new item, and set the value # must have all defined axes if we have a scalar From d4293f0d232590c61d6deb517c0eea42035d1df3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 23 Feb 2020 07:54:29 -0800 Subject: [PATCH 125/388] BUG: catch almost-null-slice in _convert_slice_indexer (#31866) --- pandas/core/indexes/base.py | 5 ++--- pandas/tests/indexes/test_base.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 53c4dfde2775b..aa22527d8c2d7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3146,8 +3146,7 @@ def _convert_slice_indexer(self, key: slice, kind: str_t): def is_int(v): return v is None or is_integer(v) - is_null_slicer = start is None and stop is None - is_index_slice = is_int(start) and is_int(stop) + is_index_slice = is_int(start) and is_int(stop) and is_int(step) is_positional = is_index_slice and not ( self.is_integer() or self.is_categorical() ) @@ -3177,7 +3176,7 @@ def is_int(v): except KeyError: pass - if is_null_slicer: + if com.is_null_slice(key): indexer = key elif is_positional: indexer = key diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 6327f1b03589b..22f6af2af4aed 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2611,3 +2611,18 @@ def test_validate_1d_input(): ser = pd.Series(0, range(4)) with pytest.raises(ValueError, match=msg): ser.index = np.array([[2, 3]] * 4) + + +def test_convert_almost_null_slice(indices): + # slice with None at both ends, but not step + idx = indices + + key = slice(None, None, "foo") + + if isinstance(idx, pd.IntervalIndex): + with pytest.raises(ValueError, match="cannot support not-default step"): + idx._convert_slice_indexer(key, "loc") + else: + msg = "'>=' not supported between instances of 'str' and 'int'" + with pytest.raises(TypeError, match=msg): + idx._convert_slice_indexer(key, "loc") From 9a02c3503f8c90675feb74d817ef28faed5098ea Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 23 Feb 2020 07:56:30 -0800 Subject: [PATCH 126/388] TST: parametrize and de-duplicate timedelta64 arithmetic tests (#32091) --- pandas/tests/arithmetic/test_timedelta64.py | 212 +++++++------------- 1 file changed, 71 insertions(+), 141 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index abdeb1b30b626..300e468c34e65 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -25,6 +25,19 @@ get_upcast_box, ) + +def assert_dtype(obj, expected_dtype): + """ + Helper to check the dtype for a Series, Index, or single-column DataFrame. + """ + if isinstance(obj, DataFrame): + dtype = obj.dtypes.iat[0] + else: + dtype = obj.dtype + + assert dtype == expected_dtype + + # ------------------------------------------------------------------ # Timedelta64[ns] dtype Comparisons @@ -522,19 +535,35 @@ def test_tda_add_sub_index(self): # ------------------------------------------------------------- # Binary operations TimedeltaIndex and timedelta-like - def test_tdi_iadd_timedeltalike(self, two_hours): + def test_tdi_iadd_timedeltalike(self, two_hours, box_with_array): # only test adding/sub offsets as + is now numeric rng = timedelta_range("1 days", "10 days") expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + orig_rng = rng rng += two_hours - tm.assert_index_equal(rng, expected) + tm.assert_equal(rng, expected) + if box_with_array is not pd.Index: + # Check that operation is actually inplace + tm.assert_equal(orig_rng, expected) - def test_tdi_isub_timedeltalike(self, two_hours): + def test_tdi_isub_timedeltalike(self, two_hours, box_with_array): # only test adding/sub offsets as - is now numeric rng = timedelta_range("1 days", "10 days") expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + orig_rng = rng rng -= two_hours - tm.assert_index_equal(rng, expected) + tm.assert_equal(rng, expected) + if box_with_array is not pd.Index: + # Check that operation is actually inplace + tm.assert_equal(orig_rng, expected) # ------------------------------------------------------------- @@ -1013,15 +1042,6 @@ def test_td64arr_add_datetime64_nat(self, box_with_array): # ------------------------------------------------------------------ # Invalid __add__/__sub__ operations - # TODO: moved from frame tests; needs parametrization/de-duplication - def test_td64_df_add_int_frame(self): - # GH#22696 Check that we don't dispatch to numpy implementation, - # which treats int64 as m8[ns] - tdi = pd.timedelta_range("1", periods=3) - df = tdi.to_frame() - other = pd.DataFrame([1, 2, 3], index=tdi) # indexed like `df` - assert_invalid_addsub_type(df, other) - @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) @pytest.mark.parametrize("tdi_freq", [None, "H"]) def test_td64arr_sub_periodlike(self, box_with_array, tdi_freq, pi_freq): @@ -1100,6 +1120,9 @@ def test_td64arr_add_sub_int(self, box_with_array, one): def test_td64arr_add_sub_integer_array(self, box_with_array): # GH#19959, deprecated GH#22535 + # GH#22696 for DataFrame case, check that we don't dispatch to numpy + # implementation, which treats int64 as m8[ns] + rng = timedelta_range("1 days 09:00:00", freq="H", periods=3) tdarr = tm.box_expected(rng, box_with_array) other = tm.box_expected([4, 3, 2], box_with_array) @@ -1119,60 +1142,6 @@ def test_td64arr_addsub_integer_array_no_freq(self, box_with_array): # ------------------------------------------------------------------ # Operations with timedelta-like others - # TODO: this was taken from tests.series.test_ops; de-duplicate - def test_operators_timedelta64_with_timedelta(self, scalar_td): - # smoke tests - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - td1 + scalar_td - scalar_td + td1 - td1 - scalar_td - scalar_td - td1 - td1 / scalar_td - scalar_td / td1 - - # TODO: this was taken from tests.series.test_ops; de-duplicate - def test_timedelta64_operations_with_timedeltas(self): - # td operate with td - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td2 = timedelta(minutes=5, seconds=4) - result = td1 - td2 - expected = Series([timedelta(seconds=0)] * 3) - Series( - [timedelta(seconds=1)] * 3 - ) - assert result.dtype == "m8[ns]" - tm.assert_series_equal(result, expected) - - result2 = td2 - td1 - expected = Series([timedelta(seconds=1)] * 3) - Series( - [timedelta(seconds=0)] * 3 - ) - tm.assert_series_equal(result2, expected) - - # roundtrip - tm.assert_series_equal(result + td2, td1) - - # Now again, using pd.to_timedelta, which should build - # a Series or a scalar, depending on input. - td1 = Series(pd.to_timedelta(["00:05:03"] * 3)) - td2 = pd.to_timedelta("00:05:04") - result = td1 - td2 - expected = Series([timedelta(seconds=0)] * 3) - Series( - [timedelta(seconds=1)] * 3 - ) - assert result.dtype == "m8[ns]" - tm.assert_series_equal(result, expected) - - result2 = td2 - td1 - expected = Series([timedelta(seconds=1)] * 3) - Series( - [timedelta(seconds=0)] * 3 - ) - tm.assert_series_equal(result2, expected) - - # roundtrip - tm.assert_series_equal(result + td2, td1) - def test_td64arr_add_td64_array(self, box_with_array): box = box_with_array dti = pd.date_range("2016-01-01", periods=3) @@ -1203,7 +1172,6 @@ def test_td64arr_sub_td64_array(self, box_with_array): result = tdarr - tdi tm.assert_equal(result, expected) - # TODO: parametrize over [add, sub, radd, rsub]? @pytest.mark.parametrize( "names", [ @@ -1232,17 +1200,11 @@ def test_td64arr_add_sub_tdi(self, box, names): result = tdi + ser tm.assert_equal(result, expected) - if box is not pd.DataFrame: - assert result.dtype == "timedelta64[ns]" - else: - assert result.dtypes[0] == "timedelta64[ns]" + assert_dtype(result, "timedelta64[ns]") result = ser + tdi tm.assert_equal(result, expected) - if box is not pd.DataFrame: - assert result.dtype == "timedelta64[ns]" - else: - assert result.dtypes[0] == "timedelta64[ns]" + assert_dtype(result, "timedelta64[ns]") expected = Series( [Timedelta(hours=-3), Timedelta(days=1, hours=-4)], name=names[2] @@ -1251,17 +1213,11 @@ def test_td64arr_add_sub_tdi(self, box, names): result = tdi - ser tm.assert_equal(result, expected) - if box is not pd.DataFrame: - assert result.dtype == "timedelta64[ns]" - else: - assert result.dtypes[0] == "timedelta64[ns]" + assert_dtype(result, "timedelta64[ns]") result = ser - tdi tm.assert_equal(result, -expected) - if box is not pd.DataFrame: - assert result.dtype == "timedelta64[ns]" - else: - assert result.dtypes[0] == "timedelta64[ns]" + assert_dtype(result, "timedelta64[ns]") def test_td64arr_add_sub_td64_nat(self, box_with_array): # GH#23320 special handling for timedelta64("NaT") @@ -1296,6 +1252,7 @@ def test_td64arr_sub_NaT(self, box_with_array): def test_td64arr_add_timedeltalike(self, two_hours, box_with_array): # only test adding/sub offsets as + is now numeric + # GH#10699 for Tick cases box = box_with_array rng = timedelta_range("1 days", "10 days") expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") @@ -1305,8 +1262,12 @@ def test_td64arr_add_timedeltalike(self, two_hours, box_with_array): result = rng + two_hours tm.assert_equal(result, expected) + result = two_hours + rng + tm.assert_equal(result, expected) + def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array): # only test adding/sub offsets as - is now numeric + # GH#10699 for Tick cases box = box_with_array rng = timedelta_range("1 days", "10 days") expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") @@ -1317,46 +1278,12 @@ def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array): result = rng - two_hours tm.assert_equal(result, expected) + result = two_hours - rng + tm.assert_equal(result, -expected) + # ------------------------------------------------------------------ # __add__/__sub__ with DateOffsets and arrays of DateOffsets - # TODO: this was taken from tests.series.test_operators; de-duplicate - def test_timedelta64_operations_with_DateOffset(self): - # GH#10699 - td = Series([timedelta(minutes=5, seconds=3)] * 3) - result = td + pd.offsets.Minute(1) - expected = Series([timedelta(minutes=6, seconds=3)] * 3) - tm.assert_series_equal(result, expected) - - result = td - pd.offsets.Minute(1) - expected = Series([timedelta(minutes=4, seconds=3)] * 3) - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning(PerformanceWarning): - result = td + Series( - [pd.offsets.Minute(1), pd.offsets.Second(3), pd.offsets.Hour(2)] - ) - expected = Series( - [ - timedelta(minutes=6, seconds=3), - timedelta(minutes=5, seconds=6), - timedelta(hours=2, minutes=5, seconds=3), - ] - ) - tm.assert_series_equal(result, expected) - - result = td + pd.offsets.Minute(1) + pd.offsets.Second(12) - expected = Series([timedelta(minutes=6, seconds=15)] * 3) - tm.assert_series_equal(result, expected) - - # valid DateOffsets - for do in ["Hour", "Minute", "Second", "Day", "Micro", "Milli", "Nano"]: - op = getattr(pd.offsets, do) - td + op(5) - op(5) + td - td - op(5) - op(5) - td - @pytest.mark.parametrize( "names", [(None, None, None), ("foo", "bar", None), ("foo", "foo", "foo")] ) @@ -1561,26 +1488,6 @@ class TestTimedeltaArraylikeMulDivOps: # Tests for timedelta64[ns] # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ - # TODO: Moved from tests.series.test_operators; needs cleanup - @pytest.mark.parametrize("m", [1, 3, 10]) - @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) - def test_timedelta64_conversions(self, m, unit): - startdate = Series(pd.date_range("2013-01-01", "2013-01-03")) - enddate = Series(pd.date_range("2013-03-01", "2013-03-03")) - - ser = enddate - startdate - ser[2] = np.nan - - # op - expected = Series([x / np.timedelta64(m, unit) for x in ser]) - result = ser / np.timedelta64(m, unit) - tm.assert_series_equal(result, expected) - - # reverse op - expected = Series([Timedelta(np.timedelta64(m, unit)) / x for x in ser]) - result = np.timedelta64(m, unit) / ser - tm.assert_series_equal(result, expected) - # ------------------------------------------------------------------ # Multiplication # organized with scalar others first, then array-like @@ -1734,6 +1641,29 @@ def test_td64arr_div_tdlike_scalar(self, two_hours, box_with_array): expected = 1 / expected tm.assert_equal(result, expected) + @pytest.mark.parametrize("m", [1, 3, 10]) + @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) + def test_td64arr_div_td64_scalar(self, m, unit, box_with_array): + startdate = Series(pd.date_range("2013-01-01", "2013-01-03")) + enddate = Series(pd.date_range("2013-03-01", "2013-03-03")) + + ser = enddate - startdate + ser[2] = np.nan + flat = ser + ser = tm.box_expected(ser, box_with_array) + + # op + expected = Series([x / np.timedelta64(m, unit) for x in flat]) + expected = tm.box_expected(expected, box_with_array) + result = ser / np.timedelta64(m, unit) + tm.assert_equal(result, expected) + + # reverse op + expected = Series([Timedelta(np.timedelta64(m, unit)) / x for x in flat]) + expected = tm.box_expected(expected, box_with_array) + result = np.timedelta64(m, unit) / ser + tm.assert_equal(result, expected) + def test_td64arr_div_tdlike_scalar_with_nat(self, two_hours, box_with_array): rng = TimedeltaIndex(["1 days", pd.NaT, "2 days"], name="foo") expected = pd.Float64Index([12, np.nan, 24], name="foo") From df49f5355a5cd6c4fe10494f4c1ac8c9364d648c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 23 Feb 2020 08:18:49 -0800 Subject: [PATCH 127/388] REF: pass str_rep through arithmetic ops more consistently (#31297) --- pandas/core/ops/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 1ef3889703341..1ee4c8e85be6b 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -828,7 +828,7 @@ def f(self, other, axis=default_axis, level=None): return _combine_series_frame(self, other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` - new_data = dispatch_to_series(self, other, op) + new_data = dispatch_to_series(self, other, op, str_rep) return self._construct_result(new_data) f.__name__ = op_name @@ -852,13 +852,15 @@ def f(self, other): new_data = dispatch_to_series(self, other, op, str_rep) elif isinstance(other, ABCSeries): - new_data = dispatch_to_series(self, other, op, axis="columns") + new_data = dispatch_to_series( + self, other, op, str_rep=str_rep, axis="columns" + ) else: # straight boolean comparisons we want to allow all columns # (regardless of dtype to pass thru) See #4537 for discussion. - new_data = dispatch_to_series(self, other, op) + new_data = dispatch_to_series(self, other, op, str_rep) return self._construct_result(new_data) From aa6f241f5ef8d46de9fab077fee4bde60fe039c7 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sun, 23 Feb 2020 18:06:02 +0100 Subject: [PATCH 128/388] Use fixtures in pandas/tests/base (#32046) --- pandas/conftest.py | 19 +- pandas/tests/base/test_ops.py | 401 +++++++++++++++++---------------- pandas/tests/indexes/common.py | 8 +- 3 files changed, 228 insertions(+), 200 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index f7c6a0c899642..be44e6c2b36da 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -986,7 +986,7 @@ def _gen_mi(): "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), - "bool": tm.makeBoolIndex(2), + "bool": tm.makeBoolIndex(10), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), "empty": Index([]), @@ -998,6 +998,15 @@ def _gen_mi(): @pytest.fixture(params=indices_dict.keys()) def indices(request): + """ + Fixture for many "simple" kinds of indices. + + These indices are unlikely to cover corner cases, e.g. + - no names + - no NaTs/NaNs + - no values near implementation bounds + - ... + """ # copy to avoid mutation, e.g. setting .name return indices_dict[request.param].copy() @@ -1015,6 +1024,14 @@ def _create_series(index): } +@pytest.fixture +def series_with_simple_index(indices): + """ + Fixture for tests on series with changing types of indices. + """ + return _create_series(indices) + + _narrow_dtypes = [ np.float16, np.float32, diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 9deb56f070d56..625d559001e72 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -137,227 +137,238 @@ def setup_method(self, method): self.is_valid_objs = self.objs self.not_valid_objs = [] - def test_none_comparison(self): + def test_none_comparison(self, series_with_simple_index): + series = series_with_simple_index + if isinstance(series.index, IntervalIndex): + # IntervalIndex breaks on "series[0] = np.nan" below + pytest.skip("IntervalIndex doesn't support assignment") + if len(series) < 1: + pytest.skip("Test doesn't make sense on empty data") # bug brought up by #1079 # changed from TypeError in 0.17.0 - for o in self.is_valid_objs: - if isinstance(o, Series): + series[0] = np.nan + + # noinspection PyComparisonWithNone + result = series == None # noqa + assert not result.iat[0] + assert not result.iat[1] + + # noinspection PyComparisonWithNone + result = series != None # noqa + assert result.iat[0] + assert result.iat[1] + + result = None == series # noqa + assert not result.iat[0] + assert not result.iat[1] + + result = None != series # noqa + assert result.iat[0] + assert result.iat[1] + + if is_datetime64_dtype(series) or is_datetime64tz_dtype(series): + # Following DatetimeIndex (and Timestamp) convention, + # inequality comparisons with Series[datetime64] raise + msg = "Invalid comparison" + with pytest.raises(TypeError, match=msg): + None > series + with pytest.raises(TypeError, match=msg): + series > None + else: + result = None > series + assert not result.iat[0] + assert not result.iat[1] - o[0] = np.nan - - # noinspection PyComparisonWithNone - result = o == None # noqa - assert not result.iat[0] - assert not result.iat[1] - - # noinspection PyComparisonWithNone - result = o != None # noqa - assert result.iat[0] - assert result.iat[1] - - result = None == o # noqa - assert not result.iat[0] - assert not result.iat[1] - - result = None != o # noqa - assert result.iat[0] - assert result.iat[1] - - if is_datetime64_dtype(o) or is_datetime64tz_dtype(o): - # Following DatetimeIndex (and Timestamp) convention, - # inequality comparisons with Series[datetime64] raise - msg = "Invalid comparison" - with pytest.raises(TypeError, match=msg): - None > o - with pytest.raises(TypeError, match=msg): - o > None - else: - result = None > o - assert not result.iat[0] - assert not result.iat[1] + result = series < None + assert not result.iat[0] + assert not result.iat[1] - result = o < None - assert not result.iat[0] - assert not result.iat[1] + def test_ndarray_compat_properties(self, index_or_series_obj): + obj = index_or_series_obj - def test_ndarray_compat_properties(self): + # Check that we work. + for p in ["shape", "dtype", "T", "nbytes"]: + assert getattr(obj, p, None) is not None - for o in self.objs: - # Check that we work. - for p in ["shape", "dtype", "T", "nbytes"]: - assert getattr(o, p, None) is not None + # deprecated properties + for p in ["flags", "strides", "itemsize", "base", "data"]: + assert not hasattr(obj, p) - # deprecated properties - for p in ["flags", "strides", "itemsize", "base", "data"]: - assert not hasattr(o, p) + msg = "can only convert an array of size 1 to a Python scalar" + with pytest.raises(ValueError, match=msg): + obj.item() # len > 1 - msg = "can only convert an array of size 1 to a Python scalar" - with pytest.raises(ValueError, match=msg): - o.item() # len > 1 - - assert o.ndim == 1 - assert o.size == len(o) + assert obj.ndim == 1 + assert obj.size == len(obj) assert Index([1]).item() == 1 assert Series([1]).item() == 1 - def test_value_counts_unique_nunique(self): - for orig in self.objs: - o = orig.copy() - klass = type(o) - values = o._values - - if isinstance(values, Index): - # reset name not to affect latter process - values.name = None - - # create repeated values, 'n'th element is repeated by n+1 times - # skip boolean, because it only has 2 values at most - if isinstance(o, Index) and o.is_boolean(): - continue - elif isinstance(o, Index): - expected_index = Index(o[::-1]) - expected_index.name = None - o = o.repeat(range(1, len(o) + 1)) - o.name = "a" - else: - expected_index = Index(values[::-1]) - idx = o.index.repeat(range(1, len(o) + 1)) - # take-based repeat - indices = np.repeat(np.arange(len(o)), range(1, len(o) + 1)) - rep = values.take(indices) - o = klass(rep, index=idx, name="a") - - # check values has the same dtype as the original - assert o.dtype == orig.dtype - - expected_s = Series( - range(10, 0, -1), index=expected_index, dtype="int64", name="a" + def test_value_counts_unique_nunique(self, index_or_series_obj): + orig = index_or_series_obj + obj = orig.copy() + klass = type(obj) + values = obj._values + + if orig.duplicated().any(): + pytest.xfail( + "The test implementation isn't flexible enough to deal" + " with duplicated values. This isn't a bug in the" + " application code, but in the test code." ) - result = o.value_counts() - tm.assert_series_equal(result, expected_s) - assert result.index.name is None - assert result.name == "a" + # create repeated values, 'n'th element is repeated by n+1 times + if isinstance(obj, Index): + expected_index = Index(obj[::-1]) + expected_index.name = None + obj = obj.repeat(range(1, len(obj) + 1)) + else: + expected_index = Index(values[::-1]) + idx = obj.index.repeat(range(1, len(obj) + 1)) + # take-based repeat + indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1)) + rep = values.take(indices) + obj = klass(rep, index=idx) + + # check values has the same dtype as the original + assert obj.dtype == orig.dtype + + expected_s = Series( + range(len(orig), 0, -1), index=expected_index, dtype="int64" + ) - result = o.unique() - if isinstance(o, Index): - assert isinstance(result, type(o)) - tm.assert_index_equal(result, orig) - assert result.dtype == orig.dtype - elif is_datetime64tz_dtype(o): - # datetimetz Series returns array of Timestamp - assert result[0] == orig[0] - for r in result: - assert isinstance(r, Timestamp) - - tm.assert_numpy_array_equal( - result.astype(object), orig._values.astype(object) - ) - else: - tm.assert_numpy_array_equal(result, orig.values) - assert result.dtype == orig.dtype + result = obj.value_counts() + tm.assert_series_equal(result, expected_s) + assert result.index.name is None + + result = obj.unique() + if isinstance(obj, Index): + assert isinstance(result, type(obj)) + tm.assert_index_equal(result, orig) + assert result.dtype == orig.dtype + elif is_datetime64tz_dtype(obj): + # datetimetz Series returns array of Timestamp + assert result[0] == orig[0] + for r in result: + assert isinstance(r, Timestamp) + + tm.assert_numpy_array_equal( + result.astype(object), orig._values.astype(object) + ) + else: + tm.assert_numpy_array_equal(result, orig.values) + assert result.dtype == orig.dtype - assert o.nunique() == len(np.unique(o.values)) + # dropna=True would break for MultiIndex + assert obj.nunique(dropna=False) == len(np.unique(obj.values)) @pytest.mark.parametrize("null_obj", [np.nan, None]) - def test_value_counts_unique_nunique_null(self, null_obj): - - for orig in self.objs: - o = orig.copy() - klass = type(o) - values = o._ndarray_values - - if not allow_na_ops(o): - continue - - # special assign to the numpy array - if is_datetime64tz_dtype(o): - if isinstance(o, DatetimeIndex): - v = o.asi8 - v[0:2] = iNaT - values = o._shallow_copy(v) - else: - o = o.copy() - o[0:2] = pd.NaT - values = o._values - - elif needs_i8_conversion(o): - values[0:2] = iNaT - values = o._shallow_copy(values) + def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): + orig = index_or_series_obj + obj = orig.copy() + klass = type(obj) + values = obj._ndarray_values + num_values = len(orig) + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif isinstance(orig, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip(f"values of {klass} cannot be changed") + elif isinstance(orig, pd.MultiIndex): + pytest.skip("MultiIndex doesn't support isna") + + # special assign to the numpy array + if is_datetime64tz_dtype(obj): + if isinstance(obj, DatetimeIndex): + v = obj.asi8 + v[0:2] = iNaT + values = obj._shallow_copy(v) else: - values[0:2] = null_obj - # check values has the same dtype as the original - - assert values.dtype == o.dtype + obj = obj.copy() + obj[0:2] = pd.NaT + values = obj._values - # create repeated values, 'n'th element is repeated by n+1 - # times - if isinstance(o, (DatetimeIndex, PeriodIndex)): - expected_index = o.copy() - expected_index.name = None + elif needs_i8_conversion(obj): + values[0:2] = iNaT + values = obj._shallow_copy(values) + else: + values[0:2] = null_obj - # attach name to klass - o = klass(values.repeat(range(1, len(o) + 1))) - o.name = "a" - else: - if isinstance(o, DatetimeIndex): - expected_index = orig._values._shallow_copy(values) - else: - expected_index = Index(values) - expected_index.name = None - o = o.repeat(range(1, len(o) + 1)) - o.name = "a" - - # check values has the same dtype as the original - assert o.dtype == orig.dtype - # check values correctly have NaN - nanloc = np.zeros(len(o), dtype=np.bool) - nanloc[:3] = True - if isinstance(o, Index): - tm.assert_numpy_array_equal(pd.isna(o), nanloc) - else: - exp = Series(nanloc, o.index, name="a") - tm.assert_series_equal(pd.isna(o), exp) - - expected_s_na = Series( - list(range(10, 2, -1)) + [3], - index=expected_index[9:0:-1], - dtype="int64", - name="a", - ) - expected_s = Series( - list(range(10, 2, -1)), - index=expected_index[9:1:-1], - dtype="int64", - name="a", - ) + # check values has the same dtype as the original + assert values.dtype == obj.dtype - result_s_na = o.value_counts(dropna=False) - tm.assert_series_equal(result_s_na, expected_s_na) - assert result_s_na.index.name is None - assert result_s_na.name == "a" - result_s = o.value_counts() - tm.assert_series_equal(o.value_counts(), expected_s) - assert result_s.index.name is None - assert result_s.name == "a" + # create repeated values, 'n'th element is repeated by n+1 + # times + if isinstance(obj, (DatetimeIndex, PeriodIndex)): + expected_index = obj.copy() + expected_index.name = None - result = o.unique() - if isinstance(o, Index): - tm.assert_index_equal(result, Index(values[1:], name="a")) - elif is_datetime64tz_dtype(o): - # unable to compare NaT / nan - tm.assert_extension_array_equal(result[1:], values[2:]) - assert result[0] is pd.NaT + # attach name to klass + obj = klass(values.repeat(range(1, len(obj) + 1))) + obj.name = "a" + else: + if isinstance(obj, DatetimeIndex): + expected_index = orig._values._shallow_copy(values) else: - tm.assert_numpy_array_equal(result[1:], values[2:]) - - assert pd.isna(result[0]) - assert result.dtype == orig.dtype + expected_index = Index(values) + expected_index.name = None + obj = obj.repeat(range(1, len(obj) + 1)) + obj.name = "a" + + # check values has the same dtype as the original + assert obj.dtype == orig.dtype + + # check values correctly have NaN + nanloc = np.zeros(len(obj), dtype=np.bool) + nanloc[:3] = True + if isinstance(obj, Index): + tm.assert_numpy_array_equal(pd.isna(obj), nanloc) + else: + exp = Series(nanloc, obj.index, name="a") + tm.assert_series_equal(pd.isna(obj), exp) + + expected_data = list(range(num_values, 2, -1)) + expected_data_na = expected_data.copy() + if expected_data_na: + expected_data_na.append(3) + expected_s_na = Series( + expected_data_na, + index=expected_index[num_values - 1 : 0 : -1], + dtype="int64", + name="a", + ) + expected_s = Series( + expected_data, + index=expected_index[num_values - 1 : 1 : -1], + dtype="int64", + name="a", + ) - assert o.nunique() == 8 - assert o.nunique(dropna=False) == 9 + result_s_na = obj.value_counts(dropna=False) + tm.assert_series_equal(result_s_na, expected_s_na) + assert result_s_na.index.name is None + assert result_s_na.name == "a" + result_s = obj.value_counts() + tm.assert_series_equal(obj.value_counts(), expected_s) + assert result_s.index.name is None + assert result_s.name == "a" + + result = obj.unique() + if isinstance(obj, Index): + tm.assert_index_equal(result, Index(values[1:], name="a")) + elif is_datetime64tz_dtype(obj): + # unable to compare NaT / nan + tm.assert_extension_array_equal(result[1:], values[2:]) + assert result[0] is pd.NaT + elif len(obj) > 0: + tm.assert_numpy_array_equal(result[1:], values[2:]) + + assert pd.isna(result[0]) + assert result.dtype == orig.dtype + + assert obj.nunique() == max(0, num_values - 2) + assert obj.nunique(dropna=False) == max(0, num_values - 1) def test_value_counts_inferred(self, index_or_series): klass = index_or_series diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c9e762af3a303..dca317a9eb03f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -514,12 +514,12 @@ def test_union_base(self, indices): @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort, indices): - if isinstance(indices, CategoricalIndex): - return - first = indices[2:] second = indices[:4] - answer = indices[4:] + if isinstance(indices, CategoricalIndex) or indices.is_boolean(): + answer = [] + else: + answer = indices[4:] result = first.difference(second, sort) assert tm.equalContents(result, answer) From 1c142e7c6d17ec847448ffefbfd7b27e2657685b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 24 Feb 2020 06:32:11 -0800 Subject: [PATCH 129/388] REF: de-duplicate factorize and duplicated code (#32216) --- asv_bench/benchmarks/algorithms.py | 91 ++++++++++++------------------ asv_bench/benchmarks/timeseries.py | 14 ----- 2 files changed, 35 insertions(+), 70 deletions(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 0f3b3838de1b2..1768e682b3db4 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -31,83 +31,62 @@ def time_maybe_convert_objects(self): class Factorize: - params = [[True, False], ["int", "uint", "float", "string"]] - param_names = ["sort", "dtype"] - - def setup(self, sort, dtype): - N = 10 ** 5 - data = { - "int": pd.Int64Index(np.arange(N).repeat(5)), - "uint": pd.UInt64Index(np.arange(N).repeat(5)), - "float": pd.Float64Index(np.random.randn(N).repeat(5)), - "string": tm.makeStringIndex(N).repeat(5), - } - self.idx = data[dtype] - - def time_factorize(self, sort, dtype): - self.idx.factorize(sort=sort) - - -class FactorizeUnique: - - params = [[True, False], ["int", "uint", "float", "string"]] - param_names = ["sort", "dtype"] + params = [ + [True, False], + [True, False], + ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"], + ] + param_names = ["unique", "sort", "dtype"] - def setup(self, sort, dtype): + def setup(self, unique, sort, dtype): N = 10 ** 5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), - "float": pd.Float64Index(np.arange(N)), + "float": pd.Float64Index(np.random.randn(N)), "string": tm.makeStringIndex(N), - } - self.idx = data[dtype] - assert self.idx.is_unique - - def time_factorize(self, sort, dtype): + "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": pd.date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + }[dtype] + if not unique: + data = data.repeat(5) + self.idx = data + + def time_factorize(self, unique, sort, dtype): self.idx.factorize(sort=sort) class Duplicated: - params = [["first", "last", False], ["int", "uint", "float", "string"]] - param_names = ["keep", "dtype"] - - def setup(self, keep, dtype): - N = 10 ** 5 - data = { - "int": pd.Int64Index(np.arange(N).repeat(5)), - "uint": pd.UInt64Index(np.arange(N).repeat(5)), - "float": pd.Float64Index(np.random.randn(N).repeat(5)), - "string": tm.makeStringIndex(N).repeat(5), - } - self.idx = data[dtype] - # cache is_unique - self.idx.is_unique - - def time_duplicated(self, keep, dtype): - self.idx.duplicated(keep=keep) - - -class DuplicatedUniqueIndex: - - params = ["int", "uint", "float", "string"] - param_names = ["dtype"] + params = [ + [True, False], + ["first", "last", False], + ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"], + ] + param_names = ["unique", "keep", "dtype"] - def setup(self, dtype): + def setup(self, unique, keep, dtype): N = 10 ** 5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), "float": pd.Float64Index(np.random.randn(N)), "string": tm.makeStringIndex(N), - } - self.idx = data[dtype] + "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": pd.date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + }[dtype] + if not unique: + data = data.repeat(5) + self.idx = data # cache is_unique self.idx.is_unique - def time_duplicated_unique(self, dtype): - self.idx.duplicated() + def time_duplicated(self, unique, keep, dtype): + self.idx.duplicated(keep=keep) class Hashing: diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 2f7ea8b9c0873..e3ed33456ee44 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -91,20 +91,6 @@ def time_reest_datetimeindex(self, tz): self.df.reset_index() -class Factorize: - - params = [None, "Asia/Tokyo"] - param_names = "tz" - - def setup(self, tz): - N = 100000 - self.dti = date_range("2011-01-01", freq="H", periods=N, tz=tz) - self.dti = self.dti.repeat(5) - - def time_factorize(self, tz): - self.dti.factorize() - - class InferFreq: params = [None, "D", "B"] From 7d32184611ef570b4996663fa9291d9cc5700a93 Mon Sep 17 00:00:00 2001 From: Sumanau Sareen Date: Mon, 24 Feb 2020 22:02:46 +0530 Subject: [PATCH 130/388] Import OptionError in pandas.errors (#32142) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/errors/__init__.py | 2 ++ pandas/tests/test_errors.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a4c991dcc166c..705c335acfb48 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -43,7 +43,7 @@ Other enhancements - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`) - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`) -- +- `OptionError` is now exposed in `pandas.errors` (:issue:`27553`) - .. --------------------------------------------------------------------------- diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index ebe9a3d5bf472..29e69cc5fe509 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -4,6 +4,8 @@ Expose public exceptions & warnings """ +from pandas._config.config import OptionError + from pandas._libs.tslibs import NullFrequencyError, OutOfBoundsDatetime diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index d72c00ceb0045..515d798fe4322 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -17,6 +17,7 @@ "EmptyDataError", "ParserWarning", "MergeError", + "OptionError", ], ) def test_exception_importable(exc): From 241bf6099f24c118d3b2cf581139c1616e76d6b4 Mon Sep 17 00:00:00 2001 From: Ram Rachum Date: Tue, 25 Feb 2020 03:52:32 +0200 Subject: [PATCH 131/388] CLN: Fix exception causes in datetimelike.py (#32164) --- pandas/core/arrays/datetimelike.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 854075eaa8d09..f637e16caa4c6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -777,8 +777,10 @@ def searchsorted(self, value, side="left", sorter=None): if isinstance(value, str): try: value = self._scalar_from_string(value) - except ValueError: - raise TypeError("searchsorted requires compatible dtype or scalar") + except ValueError as e: + raise TypeError( + "searchsorted requires compatible dtype or scalar" + ) from e elif is_valid_nat_for_dtype(value, self.dtype): value = NaT @@ -1041,7 +1043,7 @@ def _validate_frequency(cls, index, freq, **kwargs): raise ValueError( f"Inferred frequency {inferred} from passed values " f"does not conform to passed frequency {freq.freqstr}" - ) + ) from e # monotonicity/uniqueness properties are called via frequencies.infer_freq, # see GH#23789 From 54b400196c4663def2f51720d52fc3408a65e32a Mon Sep 17 00:00:00 2001 From: Achmad Syarif Hidayatullah <30652154+asyarif93@users.noreply.github.com> Date: Tue, 25 Feb 2020 08:54:45 +0700 Subject: [PATCH 132/388] DOC: Update pandas.DataFrame.droplevel docstring (#32004) --- ci/code_checks.sh | 2 +- pandas/core/generic.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index bb7d8a388e6e2..e2dc543360a62 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -269,7 +269,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests generic.py' ; echo $MSG pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" + -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests groupby.py' ; echo $MSG diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a6ab0d4034ddb..ff7c481d550d4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -602,6 +602,10 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: of levels. axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis along which the level(s) is removed: + + * 0 or 'index': remove level(s) in column. + * 1 or 'columns': remove level(s) in row. Returns ------- @@ -617,7 +621,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: ... ]).set_index([0, 1]).rename_axis(['a', 'b']) >>> df.columns = pd.MultiIndex.from_tuples([ - ... ('c', 'e'), ('d', 'f') + ... ('c', 'e'), ('d', 'f') ... ], names=['level_1', 'level_2']) >>> df @@ -636,7 +640,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: 6 7 8 10 11 12 - >>> df.droplevel('level2', axis=1) + >>> df.droplevel('level_2', axis=1) level_1 c d a b 1 2 3 4 From ee9cb758a7aed4569c97b47f8aa44913316c29ec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 10:47:34 -0800 Subject: [PATCH 133/388] troubleshoot 32bit build (#32241) --- ci/setup_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/setup_env.sh b/ci/setup_env.sh index e5bee09fe2f79..ae39b0dda5d09 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -50,7 +50,7 @@ echo echo "update conda" conda config --set ssl_verify false conda config --set quiet true --set always_yes true --set changeps1 false -conda install pip # create conda to create a historical artifact for pip & setuptools +conda install pip conda # create conda to create a historical artifact for pip & setuptools conda update -n base conda echo "conda info -a" From e88629f0c1cc09016f68757f82b0ef42800fe855 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Tue, 25 Feb 2020 19:51:25 +0100 Subject: [PATCH 134/388] CI: Remove float16 fixture value (#32221) --- pandas/tests/base/test_ops.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 625d559001e72..f85d823cb2fac 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -277,6 +277,12 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): pytest.skip(f"values of {klass} cannot be changed") elif isinstance(orig, pd.MultiIndex): pytest.skip("MultiIndex doesn't support isna") + elif orig.duplicated().any(): + pytest.xfail( + "The test implementation isn't flexible enough to deal" + " with duplicated values. This isn't a bug in the" + " application code, but in the test code." + ) # special assign to the numpy array if is_datetime64tz_dtype(obj): From 69f4c70fd551a4d1bbdcf490109b999cc97e4ce5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 12:40:23 -0800 Subject: [PATCH 135/388] REF/TST: method-specific files for DataFrame timeseries methods (#32230) --- pandas/tests/frame/conftest.py | 7 + pandas/tests/frame/methods/test_asfreq.py | 58 +++ pandas/tests/frame/methods/test_at_time.py | 86 +++++ .../tests/frame/methods/test_between_time.py | 110 ++++++ pandas/tests/frame/methods/test_to_period.py | 36 ++ pandas/tests/frame/methods/test_tz_convert.py | 84 +++++ .../tests/frame/methods/test_tz_localize.py | 21 ++ pandas/tests/frame/test_timeseries.py | 339 +----------------- pandas/tests/frame/test_timezones.py | 28 -- 9 files changed, 403 insertions(+), 366 deletions(-) create mode 100644 pandas/tests/frame/methods/test_asfreq.py create mode 100644 pandas/tests/frame/methods/test_at_time.py create mode 100644 pandas/tests/frame/methods/test_between_time.py create mode 100644 pandas/tests/frame/methods/test_to_period.py create mode 100644 pandas/tests/frame/methods/test_tz_convert.py create mode 100644 pandas/tests/frame/methods/test_tz_localize.py diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 774eb443c45fe..03598b6bb5eca 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -1,3 +1,5 @@ +from itertools import product + import numpy as np import pytest @@ -5,6 +7,11 @@ import pandas._testing as tm +@pytest.fixture(params=product([True, False], [True, False])) +def close_open_fixture(request): + return request.param + + @pytest.fixture def float_frame_with_na(): """ diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py new file mode 100644 index 0000000000000..40b0ec0c0d811 --- /dev/null +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -0,0 +1,58 @@ +from datetime import datetime + +import numpy as np + +from pandas import DataFrame, DatetimeIndex, Series, date_range +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestAsFreq: + def test_asfreq(self, datetime_frame): + offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) + rule_monthly = datetime_frame.asfreq("BM") + + tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"]) + + filled = rule_monthly.asfreq("B", method="pad") # noqa + # TODO: actually check that this worked. + + # don't forget! + filled_dep = rule_monthly.asfreq("B", method="pad") # noqa + + # test does not blow up on length-0 DataFrame + zero_length = datetime_frame.reindex([]) + result = zero_length.asfreq("BM") + assert result is not zero_length + + def test_asfreq_datetimeindex(self): + df = DataFrame( + {"A": [1, 2, 3]}, + index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)], + ) + df = df.asfreq("B") + assert isinstance(df.index, DatetimeIndex) + + ts = df["A"].asfreq("B") + assert isinstance(ts.index, DatetimeIndex) + + def test_asfreq_fillvalue(self): + # test for fill value during upsampling, related to issue 3715 + + # setup + rng = date_range("1/1/2016", periods=10, freq="2S") + ts = Series(np.arange(len(rng)), index=rng) + df = DataFrame({"one": ts}) + + # insert pre-existing missing value + df.loc["2016-01-01 00:00:08", "one"] = None + + actual_df = df.asfreq(freq="1S", fill_value=9.0) + expected_df = df.asfreq(freq="1S").fillna(9.0) + expected_df.loc["2016-01-01 00:00:08", "one"] = None + tm.assert_frame_equal(expected_df, actual_df) + + expected_series = ts.asfreq(freq="1S").fillna(9.0) + actual_series = ts.asfreq(freq="1S", fill_value=9.0) + tm.assert_series_equal(expected_series, actual_series) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py new file mode 100644 index 0000000000000..108bbbfa183c4 --- /dev/null +++ b/pandas/tests/frame/methods/test_at_time.py @@ -0,0 +1,86 @@ +from datetime import time + +import numpy as np +import pytest +import pytz + +from pandas import DataFrame, date_range +import pandas._testing as tm + + +class TestAtTime: + def test_at_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_frame_equal(result, expected) + + result = ts.loc[time(9, 30)] + expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] + + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts.at_time(time(0, 0)) + tm.assert_frame_equal(result, ts) + + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = DataFrame(np.random.randn(len(rng), 2), rng) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + @pytest.mark.parametrize( + "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)] + ) + def test_at_time_errors(self, hour): + # GH#24043 + dti = date_range("2018", periods=3, freq="H") + df = DataFrame(list(range(len(dti))), index=dti) + if getattr(hour, "tzinfo", None) is None: + result = df.at_time(hour) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="Index must be timezone"): + df.at_time(hour) + + def test_at_time_tz(self): + # GH#24043 + dti = date_range("2018", periods=3, freq="H", tz="US/Pacific") + df = DataFrame(list(range(len(dti))), index=dti) + result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern"))) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + + def test_at_time_raises(self): + # GH#20725 + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.at_time("00:00") + + @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) + def test_at_time_axis(self, axis): + # issue 8839 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts.index, ts.columns = rng, rng + + indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] + + if axis in ["index", 0]: + expected = ts.loc[indices, :] + elif axis in ["columns", 1]: + expected = ts.loc[:, indices] + + result = ts.at_time("9:30", axis=axis) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py new file mode 100644 index 0000000000000..b40604b4f4a16 --- /dev/null +++ b/pandas/tests/frame/methods/test_between_time.py @@ -0,0 +1,110 @@ +from datetime import time + +import numpy as np +import pytest + +from pandas import DataFrame, date_range +import pandas._testing as tm + + +class TestBetweenTime: + def test_between_time(self, close_open_fixture): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + inc_start, inc_end = close_open_fixture + + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert t >= stime + else: + assert t > stime + + if inc_end: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_frame_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inc_end: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self): + # GH#20725 + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_axis(self, axis): + # GH#8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + stime, etime = ("08:00:00", "09:00:00") + exp_len = 7 + + if axis in ["index", 0]: + ts.index = rng + assert len(ts.between_time(stime, etime)) == exp_len + assert len(ts.between_time(stime, etime, axis=0)) == exp_len + + if axis in ["columns", 1]: + ts.columns = rng + selected = ts.between_time(stime, etime, axis=1).columns + assert len(selected) == exp_len + + def test_between_time_axis_raises(self, axis): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + mask = np.arange(0, len(rng)) + rand_data = np.random.randn(len(rng), len(rng)) + ts = DataFrame(rand_data, index=rng, columns=rng) + stime, etime = ("08:00:00", "09:00:00") + + msg = "Index must be DatetimeIndex" + if axis in ["columns", 1]: + ts.index = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime) + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=0) + + if axis in ["index", 0]: + ts.columns = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=1) diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py new file mode 100644 index 0000000000000..eac78e611b008 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_period.py @@ -0,0 +1,36 @@ +import numpy as np +import pytest + +from pandas import DataFrame, date_range, period_range +import pandas._testing as tm + + +class TestToPeriod: + def test_frame_to_period(self): + K = 5 + + dr = date_range("1/1/2000", "1/1/2001") + pr = period_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), K), index=dr) + df["mix"] = "a" + + pts = df.to_period() + exp = df.copy() + exp.index = pr + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M") + tm.assert_index_equal(pts.index, exp.index.asfreq("M")) + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = pr + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M", axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) + + msg = "No axis named 2 for object type " + with pytest.raises(ValueError, match=msg): + df.to_period(axis=2) diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py new file mode 100644 index 0000000000000..ea8c4b88538d4 --- /dev/null +++ b/pandas/tests/frame/methods/test_tz_convert.py @@ -0,0 +1,84 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, date_range +import pandas._testing as tm + + +class TestTZConvert: + def test_frame_tz_convert(self): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + + df = DataFrame({"a": 1}, index=rng) + result = df.tz_convert("Europe/Berlin") + expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) + assert result.index.tz.zone == "Europe/Berlin" + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_convert("Europe/Berlin", axis=1) + assert result.columns.tz.zone == "Europe/Berlin" + tm.assert_frame_equal(result, expected.T) + + @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"]) + def test_tz_convert_and_localize(self, fn): + l0 = date_range("20140701", periods=5, freq="D") + l1 = date_range("20140701", periods=5, freq="D") + + int_idx = Index(range(5)) + + if fn == "tz_convert": + l0 = l0.tz_localize("UTC") + l1 = l1.tz_localize("UTC") + + for idx in [l0, l1]: + + l0_expected = getattr(idx, fn)("US/Pacific") + l1_expected = getattr(idx, fn)("US/Pacific") + + df1 = DataFrame(np.ones(5), index=l0) + df1 = getattr(df1, fn)("US/Pacific") + tm.assert_index_equal(df1.index, l0_expected) + + # MultiIndex + # GH7846 + df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) + + df3 = getattr(df2, fn)("US/Pacific", level=0) + assert not df3.index.levels[0].equals(l0) + tm.assert_index_equal(df3.index.levels[0], l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1) + assert not df3.index.levels[1].equals(l1_expected) + + df3 = getattr(df2, fn)("US/Pacific", level=1) + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + + # TODO: untested + df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa + + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + # Bad Inputs + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(index=int_idx) + df = getattr(df, fn)("US/Pacific") + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + df = getattr(df, fn)("US/Pacific", level=0) + + # Invalid level + with pytest.raises(ValueError, match="not valid"): + df = DataFrame(index=l0) + df = getattr(df, fn)("US/Pacific", level=1) diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py new file mode 100644 index 0000000000000..1d4e26a6999b7 --- /dev/null +++ b/pandas/tests/frame/methods/test_tz_localize.py @@ -0,0 +1,21 @@ +from pandas import DataFrame, date_range +import pandas._testing as tm + + +class TestTZLocalize: + # See also: + # test_tz_convert_and_localize in test_tz_convert + + def test_frame_tz_localize(self): + rng = date_range("1/1/2011", periods=100, freq="H") + + df = DataFrame({"a": 1}, index=rng) + result = df.tz_localize("utc") + expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) + assert result.index.tz.zone == "UTC" + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_localize("utc", axis=1) + assert result.columns.tz.zone == "UTC" + tm.assert_frame_equal(result, expected.T) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 5e06b6402c34f..b713af92eac27 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -1,30 +1,10 @@ -from datetime import datetime, time -from itertools import product - import numpy as np import pytest -import pytz import pandas as pd -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - MultiIndex, - Series, - date_range, - period_range, - to_datetime, -) +from pandas import DataFrame, Series, date_range, to_datetime import pandas._testing as tm -import pandas.tseries.offsets as offsets - - -@pytest.fixture(params=product([True, False], [True, False])) -def close_open_fixture(request): - return request.param - class TestDataFrameTimeSeriesMethods: def test_frame_ctor_datetime64_column(self): @@ -80,54 +60,6 @@ def test_frame_append_datetime64_col_other_units(self): assert (tmp["dates"].values == ex_vals).all() - def test_asfreq(self, datetime_frame): - offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) - rule_monthly = datetime_frame.asfreq("BM") - - tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"]) - - filled = rule_monthly.asfreq("B", method="pad") # noqa - # TODO: actually check that this worked. - - # don't forget! - filled_dep = rule_monthly.asfreq("B", method="pad") # noqa - - # test does not blow up on length-0 DataFrame - zero_length = datetime_frame.reindex([]) - result = zero_length.asfreq("BM") - assert result is not zero_length - - def test_asfreq_datetimeindex(self): - df = DataFrame( - {"A": [1, 2, 3]}, - index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)], - ) - df = df.asfreq("B") - assert isinstance(df.index, DatetimeIndex) - - ts = df["A"].asfreq("B") - assert isinstance(ts.index, DatetimeIndex) - - def test_asfreq_fillvalue(self): - # test for fill value during upsampling, related to issue 3715 - - # setup - rng = pd.date_range("1/1/2016", periods=10, freq="2S") - ts = pd.Series(np.arange(len(rng)), index=rng) - df = pd.DataFrame({"one": ts}) - - # insert pre-existing missing value - df.loc["2016-01-01 00:00:08", "one"] = None - - actual_df = df.asfreq(freq="1S", fill_value=9.0) - expected_df = df.asfreq(freq="1S").fillna(9.0) - expected_df.loc["2016-01-01 00:00:08", "one"] = None - tm.assert_frame_equal(expected_df, actual_df) - - expected_series = ts.asfreq(freq="1S").fillna(9.0) - actual_series = ts.asfreq(freq="1S", fill_value=9.0) - tm.assert_series_equal(expected_series, actual_series) - @pytest.mark.parametrize( "data,idx,expected_first,expected_last", [ @@ -239,183 +171,6 @@ def test_last_raises(self): with pytest.raises(TypeError): # index is not a DatetimeIndex df.last("1D") - def test_at_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - rs = ts.at_time(rng[1]) - assert (rs.index.hour == rng[1].hour).all() - assert (rs.index.minute == rng[1].minute).all() - assert (rs.index.second == rng[1].second).all() - - result = ts.at_time("9:30") - expected = ts.at_time(time(9, 30)) - tm.assert_frame_equal(result, expected) - - result = ts.loc[time(9, 30)] - expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - - tm.assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range("1/1/2000", "1/31/2000") - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts.at_time(time(0, 0)) - tm.assert_frame_equal(result, ts) - - # time doesn't exist - rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = DataFrame(np.random.randn(len(rng), 2), rng) - rs = ts.at_time("16:00") - assert len(rs) == 0 - - @pytest.mark.parametrize( - "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)] - ) - def test_at_time_errors(self, hour): - # GH 24043 - dti = pd.date_range("2018", periods=3, freq="H") - df = pd.DataFrame(list(range(len(dti))), index=dti) - if getattr(hour, "tzinfo", None) is None: - result = df.at_time(hour) - expected = df.iloc[1:2] - tm.assert_frame_equal(result, expected) - else: - with pytest.raises(ValueError, match="Index must be timezone"): - df.at_time(hour) - - def test_at_time_tz(self): - # GH 24043 - dti = pd.date_range("2018", periods=3, freq="H", tz="US/Pacific") - df = pd.DataFrame(list(range(len(dti))), index=dti) - result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern"))) - expected = df.iloc[1:2] - tm.assert_frame_equal(result, expected) - - def test_at_time_raises(self): - # GH20725 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - with pytest.raises(TypeError): # index is not a DatetimeIndex - df.at_time("00:00") - - @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) - def test_at_time_axis(self, axis): - # issue 8839 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), len(rng))) - ts.index, ts.columns = rng, rng - - indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] - - if axis in ["index", 0]: - expected = ts.loc[indices, :] - elif axis in ["columns", 1]: - expected = ts.loc[:, indices] - - result = ts.at_time("9:30", axis=axis) - tm.assert_frame_equal(result, expected) - - def test_between_time(self, close_open_fixture): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(0, 0) - etime = time(1, 0) - inc_start, inc_end = close_open_fixture - - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert t >= stime - else: - assert t > stime - - if inc_end: - assert t <= etime - else: - assert t < etime - - result = ts.between_time("00:00", "01:00") - expected = ts.between_time(stime, etime) - tm.assert_frame_equal(result, expected) - - # across midnight - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert (t >= stime) or (t <= etime) - else: - assert (t > stime) or (t <= etime) - - if inc_end: - assert (t <= etime) or (t >= stime) - else: - assert (t < etime) or (t >= stime) - - def test_between_time_raises(self): - # GH20725 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - with pytest.raises(TypeError): # index is not a DatetimeIndex - df.between_time(start_time="00:00", end_time="12:00") - - def test_between_time_axis(self, axis): - # issue 8839 - rng = date_range("1/1/2000", periods=100, freq="10min") - ts = DataFrame(np.random.randn(len(rng), len(rng))) - stime, etime = ("08:00:00", "09:00:00") - exp_len = 7 - - if axis in ["index", 0]: - ts.index = rng - assert len(ts.between_time(stime, etime)) == exp_len - assert len(ts.between_time(stime, etime, axis=0)) == exp_len - - if axis in ["columns", 1]: - ts.columns = rng - selected = ts.between_time(stime, etime, axis=1).columns - assert len(selected) == exp_len - - def test_between_time_axis_raises(self, axis): - # issue 8839 - rng = date_range("1/1/2000", periods=100, freq="10min") - mask = np.arange(0, len(rng)) - rand_data = np.random.randn(len(rng), len(rng)) - ts = DataFrame(rand_data, index=rng, columns=rng) - stime, etime = ("08:00:00", "09:00:00") - - msg = "Index must be DatetimeIndex" - if axis in ["columns", 1]: - ts.index = mask - with pytest.raises(TypeError, match=msg): - ts.between_time(stime, etime) - with pytest.raises(TypeError, match=msg): - ts.between_time(stime, etime, axis=0) - - if axis in ["index", 0]: - ts.columns = mask - with pytest.raises(TypeError, match=msg): - ts.between_time(stime, etime, axis=1) - def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]}) @@ -455,95 +210,3 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): {0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]" ) tm.assert_frame_equal(result, expected) - - def test_frame_to_period(self): - K = 5 - - dr = date_range("1/1/2000", "1/1/2001") - pr = period_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.randn(len(dr), K), index=dr) - df["mix"] = "a" - - pts = df.to_period() - exp = df.copy() - exp.index = pr - tm.assert_frame_equal(pts, exp) - - pts = df.to_period("M") - tm.assert_index_equal(pts.index, exp.index.asfreq("M")) - - df = df.T - pts = df.to_period(axis=1) - exp = df.copy() - exp.columns = pr - tm.assert_frame_equal(pts, exp) - - pts = df.to_period("M", axis=1) - tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) - - msg = "No axis named 2 for object type " - with pytest.raises(ValueError, match=msg): - df.to_period(axis=2) - - @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"]) - def test_tz_convert_and_localize(self, fn): - l0 = date_range("20140701", periods=5, freq="D") - l1 = date_range("20140701", periods=5, freq="D") - - int_idx = Index(range(5)) - - if fn == "tz_convert": - l0 = l0.tz_localize("UTC") - l1 = l1.tz_localize("UTC") - - for idx in [l0, l1]: - - l0_expected = getattr(idx, fn)("US/Pacific") - l1_expected = getattr(idx, fn)("US/Pacific") - - df1 = DataFrame(np.ones(5), index=l0) - df1 = getattr(df1, fn)("US/Pacific") - tm.assert_index_equal(df1.index, l0_expected) - - # MultiIndex - # GH7846 - df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) - - df3 = getattr(df2, fn)("US/Pacific", level=0) - assert not df3.index.levels[0].equals(l0) - tm.assert_index_equal(df3.index.levels[0], l0_expected) - tm.assert_index_equal(df3.index.levels[1], l1) - assert not df3.index.levels[1].equals(l1_expected) - - df3 = getattr(df2, fn)("US/Pacific", level=1) - tm.assert_index_equal(df3.index.levels[0], l0) - assert not df3.index.levels[0].equals(l0_expected) - tm.assert_index_equal(df3.index.levels[1], l1_expected) - assert not df3.index.levels[1].equals(l1) - - df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) - - # TODO: untested - df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa - - tm.assert_index_equal(df3.index.levels[0], l0) - assert not df3.index.levels[0].equals(l0_expected) - tm.assert_index_equal(df3.index.levels[1], l1_expected) - assert not df3.index.levels[1].equals(l1) - - # Bad Inputs - - # Not DatetimeIndex / PeriodIndex - with pytest.raises(TypeError, match="DatetimeIndex"): - df = DataFrame(index=int_idx) - df = getattr(df, fn)("US/Pacific") - - # Not DatetimeIndex / PeriodIndex - with pytest.raises(TypeError, match="DatetimeIndex"): - df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) - df = getattr(df, fn)("US/Pacific", level=0) - - # Invalid level - with pytest.raises(ValueError, match="not valid"): - df = DataFrame(index=l0) - df = getattr(df, fn)("US/Pacific", level=1) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index b60f2052a988f..62e8a4b470218 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -59,34 +59,6 @@ def test_frame_from_records_utc(self): # it works DataFrame.from_records([rec], index="begin_time") - def test_frame_tz_localize(self): - rng = date_range("1/1/2011", periods=100, freq="H") - - df = DataFrame({"a": 1}, index=rng) - result = df.tz_localize("utc") - expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) - assert result.index.tz.zone == "UTC" - tm.assert_frame_equal(result, expected) - - df = df.T - result = df.tz_localize("utc", axis=1) - assert result.columns.tz.zone == "UTC" - tm.assert_frame_equal(result, expected.T) - - def test_frame_tz_convert(self): - rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") - - df = DataFrame({"a": 1}, index=rng) - result = df.tz_convert("Europe/Berlin") - expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) - assert result.index.tz.zone == "Europe/Berlin" - tm.assert_frame_equal(result, expected) - - df = df.T - result = df.tz_convert("Europe/Berlin", axis=1) - assert result.columns.tz.zone == "Europe/Berlin" - tm.assert_frame_equal(result, expected.T) - def test_frame_join_tzaware(self): test1 = DataFrame( np.zeros((6, 3)), From cf993fd548243054a37cfdc805130aa8a263b65b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 12:41:12 -0800 Subject: [PATCH 136/388] REF/TST: method-specific files for rename, reset_index (#32227) --- pandas/tests/frame/methods/test_rename.py | 353 ++++++++++ .../tests/frame/methods/test_reset_index.py | 299 +++++++++ pandas/tests/frame/test_alter_axes.py | 628 ------------------ pandas/tests/series/methods/test_rename.py | 91 +++ .../tests/series/methods/test_reset_index.py | 110 +++ pandas/tests/series/test_alter_axes.py | 187 +----- 6 files changed, 854 insertions(+), 814 deletions(-) create mode 100644 pandas/tests/frame/methods/test_rename.py create mode 100644 pandas/tests/frame/methods/test_reset_index.py create mode 100644 pandas/tests/series/methods/test_rename.py create mode 100644 pandas/tests/series/methods/test_reset_index.py diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py new file mode 100644 index 0000000000000..e69a562f8214d --- /dev/null +++ b/pandas/tests/frame/methods/test_rename.py @@ -0,0 +1,353 @@ +from collections import ChainMap + +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +class TestRename: + def test_rename(self, float_frame): + mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} + + renamed = float_frame.rename(columns=mapping) + renamed2 = float_frame.rename(columns=str.lower) + + tm.assert_frame_equal(renamed, renamed2) + tm.assert_frame_equal( + renamed2.rename(columns=str.upper), float_frame, check_names=False + ) + + # index + data = {"A": {"foo": 0, "bar": 1}} + + # gets sorted alphabetical + df = DataFrame(data) + renamed = df.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) + + renamed = df.rename(index=str.upper) + tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) + + # have to pass something + with pytest.raises(TypeError, match="must pass an index to rename"): + float_frame.rename() + + # partial columns + renamed = float_frame.rename(columns={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"])) + + # other axis + renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"])) + + # index with name + index = Index(["foo", "bar"], name="name") + renamer = DataFrame(data, index=index) + renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name")) + assert renamed.index.name == renamer.index.name + + @pytest.mark.parametrize( + "args,kwargs", + [ + ((ChainMap({"A": "a"}, {"B": "b"}),), dict(axis="columns")), + ((), dict(columns=ChainMap({"A": "a"}, {"B": "b"}))), + ], + ) + def test_rename_chainmap(self, args, kwargs): + # see gh-23859 + colAData = range(1, 11) + colBdata = np.random.randn(10) + + df = DataFrame({"A": colAData, "B": colBdata}) + result = df.rename(*args, **kwargs) + + expected = DataFrame({"a": colAData, "b": colBdata}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "kwargs, rename_index, rename_columns", + [ + ({"mapper": None, "axis": 0}, True, False), + ({"mapper": None, "axis": 1}, False, True), + ({"index": None}, True, False), + ({"columns": None}, False, True), + ({"index": None, "columns": None}, True, True), + ({}, False, False), + ], + ) + def test_rename_axis_none(self, kwargs, rename_index, rename_columns): + # GH 25034 + index = Index(list("abc"), name="foo") + columns = Index(["col1", "col2"], name="bar") + data = np.arange(6).reshape(3, 2) + df = DataFrame(data, index, columns) + + result = df.rename_axis(**kwargs) + expected_index = index.rename(None) if rename_index else index + expected_columns = columns.rename(None) if rename_columns else columns + expected = DataFrame(data, expected_index, expected_columns) + tm.assert_frame_equal(result, expected) + + def test_rename_multiindex(self): + + tuples_index = [("foo1", "bar1"), ("foo2", "bar2")] + tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")] + index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"]) + columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"]) + df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) + + # + # without specifying level -> across all levels + + renamed = df.rename( + index={"foo1": "foo3", "bar2": "bar3"}, + columns={"fizz1": "fizz3", "buzz2": "buzz3"}, + ) + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"] + ) + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + tm.assert_index_equal(renamed.index, new_index) + tm.assert_index_equal(renamed.columns, new_columns) + assert renamed.index.names == df.index.names + assert renamed.columns.names == df.columns.names + + # + # with specifying a level (GH13766) + + # dict + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # function + func = str.upper + new_columns = MultiIndex.from_tuples( + [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # index + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"] + ) + renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) + tm.assert_index_equal(renamed.index, new_index) + + def test_rename_nocopy(self, float_frame): + renamed = float_frame.rename(columns={"C": "foo"}, copy=False) + renamed["foo"] = 1.0 + assert (float_frame["C"] == 1.0).all() + + def test_rename_inplace(self, float_frame): + float_frame.rename(columns={"C": "foo"}) + assert "C" in float_frame + assert "foo" not in float_frame + + c_id = id(float_frame["C"]) + float_frame = float_frame.copy() + float_frame.rename(columns={"C": "foo"}, inplace=True) + + assert "C" not in float_frame + assert "foo" in float_frame + assert id(float_frame["foo"]) != c_id + + def test_rename_bug(self): + # GH 5344 + # rename set ref_locs, and set_index was not resetting + df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}) + df = df.rename(columns={0: "a"}) + df = df.rename(columns={1: "b"}) + df = df.set_index(["a", "b"]) + df.columns = ["2001-01-01"] + expected = DataFrame( + [[1], [2]], + index=MultiIndex.from_tuples( + [("foo", "bah"), ("bar", "bas")], names=["a", "b"] + ), + columns=["2001-01-01"], + ) + tm.assert_frame_equal(df, expected) + + def test_rename_bug2(self): + # GH 19497 + # rename was changing Index to MultiIndex if Index contained tuples + + df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"]) + df = df.rename({(1, 1): (5, 4)}, axis="index") + expected = DataFrame( + data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"] + ) + tm.assert_frame_equal(df, expected) + + def test_rename_errors_raises(self): + df = DataFrame(columns=["A", "B", "C", "D"]) + with pytest.raises(KeyError, match="'E'] not found in axis"): + df.rename(columns={"A": "a", "E": "e"}, errors="raise") + + @pytest.mark.parametrize( + "mapper, errors, expected_columns", + [ + ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]), + ({"A": "a"}, "raise", ["a", "B", "C", "D"]), + (str.lower, "raise", ["a", "b", "c", "d"]), + ], + ) + def test_rename_errors(self, mapper, errors, expected_columns): + # GH 13473 + # rename now works with errors parameter + df = DataFrame(columns=["A", "B", "C", "D"]) + result = df.rename(columns=mapper, errors=errors) + expected = DataFrame(columns=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_rename_objects(self, float_string_frame): + renamed = float_string_frame.rename(columns=str.upper) + + assert "FOO" in renamed + assert "foo" not in renamed + + def test_rename_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"]) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + + result = df.rename(str.lower, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis="columns") + tm.assert_frame_equal(result, expected) + + # Index + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + result = df.rename(str.lower, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename(mapper=str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + def test_rename_mapper_multi(self): + df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index( + ["A", "B"] + ) + result = df.rename(str.upper) + expected = df.rename(index=str.upper) + tm.assert_frame_equal(result, expected) + + def test_rename_positional_named(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + result = df.rename(index=str.lower, columns=str.upper) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + tm.assert_frame_equal(result, expected) + + def test_rename_axis_style_raises(self): + # see gh-12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) + + # Named target and axis + over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=1) + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(columns=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=0) + + # Multiple targets and axis + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, axis="columns") + + # Too many targets + over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, columns=str.lower) + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.rename(id, mapper=id) + + def test_rename_positional_raises(self): + # GH 29136 + df = DataFrame(columns=["A", "B"]) + msg = r"rename\(\) takes from 1 to 2 positional arguments" + + with pytest.raises(TypeError, match=msg): + df.rename(None, str.lower) + + def test_rename_no_mappings_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "must pass an index to rename" + with pytest.raises(TypeError, match=msg): + df.rename() + + with pytest.raises(TypeError, match=msg): + df.rename(None, index=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None, index=None) + + def test_rename_mapper_and_positional_arguments_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=msg): + df.rename({}, index={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}, index={}) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py new file mode 100644 index 0000000000000..6586c19af2539 --- /dev/null +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -0,0 +1,299 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestResetIndex: + def test_reset_index_tz(self, tz_aware_fixture): + # GH 3950 + # reset_index with single level + tz = tz_aware_fixture + idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") + df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) + + expected = DataFrame( + { + "idx": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "a": range(5), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx", "a", "b"], + ) + expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) + + def test_reset_index_with_intervals(self): + idx = IntervalIndex.from_breaks(np.arange(11), name="x") + original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] + + result = original.set_index("x") + expected = DataFrame({"y": np.arange(10)}, index=idx) + tm.assert_frame_equal(result, expected) + + result2 = result.reset_index() + tm.assert_frame_equal(result2, original) + + def test_reset_index(self, float_frame): + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + for i, (lev, level_codes) in enumerate( + zip(stacked.index.levels, stacked.index.codes) + ): + values = lev.take(level_codes) + name = names[i] + tm.assert_index_equal(values, Index(deleveled[name])) + + stacked.index.names = [None, None] + deleveled2 = stacked.reset_index() + tm.assert_series_equal( + deleveled["first"], deleveled2["level_0"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["level_1"], check_names=False + ) + + # default name assigned + rdf = float_frame.reset_index() + exp = Series(float_frame.index.values, name="index") + tm.assert_series_equal(rdf["index"], exp) + + # default name assigned, corner case + df = float_frame.copy() + df["index"] = "foo" + rdf = df.reset_index() + exp = Series(float_frame.index.values, name="level_0") + tm.assert_series_equal(rdf["level_0"], exp) + + # but this is ok + float_frame.index.name = "index" + deleveled = float_frame.reset_index() + tm.assert_series_equal(deleveled["index"], Series(float_frame.index)) + tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled)))) + + # preserve column names + float_frame.columns.name = "columns" + resetted = float_frame.reset_index() + assert resetted.columns.name == "columns" + + # only remove certain columns + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index(["A", "B"]) + + # TODO should reset_index check_names ? + tm.assert_frame_equal(rs, float_frame, check_names=False) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) + + rs = df.reset_index("A") + xp = float_frame.reset_index().set_index(["index", "B"]) + tm.assert_frame_equal(rs, xp, check_names=False) + + # test resetting in place + df = float_frame.copy() + resetted = float_frame.reset_index() + df.reset_index(inplace=True) + tm.assert_frame_equal(df, resetted, check_names=False) + + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index("A", drop=True) + xp = float_frame.copy() + del xp["A"] + xp = xp.set_index(["B"], append=True) + tm.assert_frame_equal(rs, xp, check_names=False) + + def test_reset_index_name(self): + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + index=Index(range(2), name="x"), + ) + assert df.reset_index().index.name is None + assert df.reset_index(drop=True).index.name is None + df.reset_index(inplace=True) + assert df.index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + result = df.set_index(["A", "B"]).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C", "D"]]) + + # With single-level Index (GH 16263) + result = df.set_index("A").reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index("A").reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A"]).reset_index(level=levels[0], drop=True) + tm.assert_frame_equal(result, df[["B", "C", "D"]]) + + # Missing levels - for both MultiIndex and single-level Index: + for idx_lev in ["A", "B"], ["A"]: + with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"): + df.set_index(idx_lev).reset_index(level=["A", "E"]) + with pytest.raises(IndexError, match="Too many levels"): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + + def test_reset_index_right_dtype(self): + time = np.arange(0.0, 10, np.sqrt(2) / 2) + s1 = Series( + (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed" + ) + df = DataFrame(s1) + + resetted = s1.reset_index() + assert resetted["time"].dtype == np.float64 + + resetted = df.reset_index() + assert resetted["time"].dtype == np.float64 + + def test_reset_index_multiindex_col(self): + vals = np.random.randn(3, 3).astype(object) + idx = ["x", "y", "z"] + full = np.hstack(([[x] for x in idx], vals)) + df = DataFrame( + vals, + Index(idx, name="a"), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index() + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_fill=None) + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_level=1, col_fill="blah") + xp = DataFrame( + full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + df = DataFrame( + vals, + MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index("a") + xp = DataFrame( + full, + Index([0, 1, 2], name="d"), + columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill=None) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill="blah", col_level=1) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + def test_reset_index_multiindex_nan(self): + # GH#6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = DataFrame( + {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]}) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + { + "A": ["a", "b", "c"], + "B": [np.nan, np.nan, np.nan], + "C": np.random.rand(3), + } + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + def test_reset_index_with_datetimeindex_cols(self): + # GH#5818 + df = DataFrame( + [[1, 2], [3, 4]], + columns=date_range("1/1/2013", "1/2/2013"), + index=["A", "B"], + ) + + result = df.reset_index() + expected = DataFrame( + [["A", 1, 2], ["B", 3, 4]], + columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)], + ) + tm.assert_frame_equal(result, expected) + + def test_reset_index_range(self): + # GH#12071 + df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2)) + result = df.reset_index() + assert isinstance(result.index, RangeIndex) + expected = DataFrame( + [[0, 0, 0], [1, 1, 1]], + columns=["index", "A", "B"], + index=RangeIndex(stop=2), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 0c19a38bb5fa2..751ed1dfdd847 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,4 +1,3 @@ -from collections import ChainMap from datetime import datetime, timedelta import inspect @@ -18,7 +17,6 @@ Index, IntervalIndex, MultiIndex, - RangeIndex, Series, Timestamp, cut, @@ -533,30 +531,6 @@ def test_convert_dti_to_series(self): df.pop("ts") tm.assert_frame_equal(df, expected) - def test_reset_index_tz(self, tz_aware_fixture): - # GH 3950 - # reset_index with single level - tz = tz_aware_fixture - idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") - df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) - - expected = DataFrame( - { - "idx": [ - datetime(2011, 1, 1), - datetime(2011, 1, 2), - datetime(2011, 1, 3), - datetime(2011, 1, 4), - datetime(2011, 1, 5), - ], - "a": range(5), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx", "a", "b"], - ) - expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) - tm.assert_frame_equal(df.reset_index(), expected) - def test_set_index_timezone(self): # GH 12358 # tz-aware Series should retain the tz @@ -583,17 +557,6 @@ def test_set_index_dst(self): exp = DataFrame({"b": [3, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp) - def test_reset_index_with_intervals(self): - idx = IntervalIndex.from_breaks(np.arange(11), name="x") - original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] - - result = original.set_index("x") - expected = DataFrame({"y": np.arange(10)}, index=idx) - tm.assert_frame_equal(result, expected) - - result2 = result.reset_index() - tm.assert_frame_equal(result2, original) - def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) df = DataFrame(np.random.randn(3, 3), columns=columns) @@ -652,65 +615,6 @@ def test_dti_set_index_reindex(self): # Renaming - def test_rename(self, float_frame): - mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} - - renamed = float_frame.rename(columns=mapping) - renamed2 = float_frame.rename(columns=str.lower) - - tm.assert_frame_equal(renamed, renamed2) - tm.assert_frame_equal( - renamed2.rename(columns=str.upper), float_frame, check_names=False - ) - - # index - data = {"A": {"foo": 0, "bar": 1}} - - # gets sorted alphabetical - df = DataFrame(data) - renamed = df.rename(index={"foo": "bar", "bar": "foo"}) - tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) - - renamed = df.rename(index=str.upper) - tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) - - # have to pass something - with pytest.raises(TypeError, match="must pass an index to rename"): - float_frame.rename() - - # partial columns - renamed = float_frame.rename(columns={"C": "foo", "D": "bar"}) - tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"])) - - # other axis - renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"}) - tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"])) - - # index with name - index = Index(["foo", "bar"], name="name") - renamer = DataFrame(data, index=index) - renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) - tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name")) - assert renamed.index.name == renamer.index.name - - @pytest.mark.parametrize( - "args,kwargs", - [ - ((ChainMap({"A": "a"}, {"B": "b"}),), dict(axis="columns")), - ((), dict(columns=ChainMap({"A": "a"}, {"B": "b"}))), - ], - ) - def test_rename_chainmap(self, args, kwargs): - # see gh-23859 - colAData = range(1, 11) - colBdata = np.random.randn(10) - - df = DataFrame({"A": colAData, "B": colBdata}) - result = df.rename(*args, **kwargs) - - expected = DataFrame({"a": colAData, "b": colBdata}) - tm.assert_frame_equal(result, expected) - def test_rename_axis_inplace(self, float_frame): # GH 15704 expected = float_frame.rename_axis("foo") @@ -785,168 +689,6 @@ def test_rename_axis_mapper(self): with pytest.raises(TypeError, match="bogus"): df.rename_axis(bogus=None) - @pytest.mark.parametrize( - "kwargs, rename_index, rename_columns", - [ - ({"mapper": None, "axis": 0}, True, False), - ({"mapper": None, "axis": 1}, False, True), - ({"index": None}, True, False), - ({"columns": None}, False, True), - ({"index": None, "columns": None}, True, True), - ({}, False, False), - ], - ) - def test_rename_axis_none(self, kwargs, rename_index, rename_columns): - # GH 25034 - index = Index(list("abc"), name="foo") - columns = Index(["col1", "col2"], name="bar") - data = np.arange(6).reshape(3, 2) - df = DataFrame(data, index, columns) - - result = df.rename_axis(**kwargs) - expected_index = index.rename(None) if rename_index else index - expected_columns = columns.rename(None) if rename_columns else columns - expected = DataFrame(data, expected_index, expected_columns) - tm.assert_frame_equal(result, expected) - - def test_rename_multiindex(self): - - tuples_index = [("foo1", "bar1"), ("foo2", "bar2")] - tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")] - index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"]) - columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"]) - df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) - - # - # without specifying level -> across all levels - - renamed = df.rename( - index={"foo1": "foo3", "bar2": "bar3"}, - columns={"fizz1": "fizz3", "buzz2": "buzz3"}, - ) - new_index = MultiIndex.from_tuples( - [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"] - ) - new_columns = MultiIndex.from_tuples( - [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] - ) - tm.assert_index_equal(renamed.index, new_index) - tm.assert_index_equal(renamed.columns, new_columns) - assert renamed.index.names == df.index.names - assert renamed.columns.names == df.columns.names - - # - # with specifying a level (GH13766) - - # dict - new_columns = MultiIndex.from_tuples( - [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") - tm.assert_index_equal(renamed.columns, new_columns) - - new_columns = MultiIndex.from_tuples( - [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") - tm.assert_index_equal(renamed.columns, new_columns) - - # function - func = str.upper - new_columns = MultiIndex.from_tuples( - [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns=func, level=0) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns=func, level="fizz") - tm.assert_index_equal(renamed.columns, new_columns) - - new_columns = MultiIndex.from_tuples( - [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"] - ) - renamed = df.rename(columns=func, level=1) - tm.assert_index_equal(renamed.columns, new_columns) - renamed = df.rename(columns=func, level="buzz") - tm.assert_index_equal(renamed.columns, new_columns) - - # index - new_index = MultiIndex.from_tuples( - [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"] - ) - renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) - tm.assert_index_equal(renamed.index, new_index) - - def test_rename_nocopy(self, float_frame): - renamed = float_frame.rename(columns={"C": "foo"}, copy=False) - renamed["foo"] = 1.0 - assert (float_frame["C"] == 1.0).all() - - def test_rename_inplace(self, float_frame): - float_frame.rename(columns={"C": "foo"}) - assert "C" in float_frame - assert "foo" not in float_frame - - c_id = id(float_frame["C"]) - float_frame = float_frame.copy() - float_frame.rename(columns={"C": "foo"}, inplace=True) - - assert "C" not in float_frame - assert "foo" in float_frame - assert id(float_frame["foo"]) != c_id - - def test_rename_bug(self): - # GH 5344 - # rename set ref_locs, and set_index was not resetting - df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}) - df = df.rename(columns={0: "a"}) - df = df.rename(columns={1: "b"}) - df = df.set_index(["a", "b"]) - df.columns = ["2001-01-01"] - expected = DataFrame( - [[1], [2]], - index=MultiIndex.from_tuples( - [("foo", "bah"), ("bar", "bas")], names=["a", "b"] - ), - columns=["2001-01-01"], - ) - tm.assert_frame_equal(df, expected) - - def test_rename_bug2(self): - # GH 19497 - # rename was changing Index to MultiIndex if Index contained tuples - - df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"]) - df = df.rename({(1, 1): (5, 4)}, axis="index") - expected = DataFrame( - data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"] - ) - tm.assert_frame_equal(df, expected) - - def test_rename_errors_raises(self): - df = DataFrame(columns=["A", "B", "C", "D"]) - with pytest.raises(KeyError, match="'E'] not found in axis"): - df.rename(columns={"A": "a", "E": "e"}, errors="raise") - - @pytest.mark.parametrize( - "mapper, errors, expected_columns", - [ - ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]), - ({"A": "a"}, "raise", ["a", "B", "C", "D"]), - (str.lower, "raise", ["a", "b", "c", "d"]), - ], - ) - def test_rename_errors(self, mapper, errors, expected_columns): - # GH 13473 - # rename now works with errors parameter - df = DataFrame(columns=["A", "B", "C", "D"]) - result = df.rename(columns=mapper, errors=errors) - expected = DataFrame(columns=expected_columns) - tm.assert_frame_equal(result, expected) - def test_reorder_levels(self): index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], @@ -985,253 +727,6 @@ def test_reorder_levels(self): result = df.reorder_levels(["L0", "L0", "L0"]) tm.assert_frame_equal(result, expected) - def test_reset_index(self, float_frame): - stacked = float_frame.stack()[::2] - stacked = DataFrame({"foo": stacked, "bar": stacked}) - - names = ["first", "second"] - stacked.index.names = names - deleveled = stacked.reset_index() - for i, (lev, level_codes) in enumerate( - zip(stacked.index.levels, stacked.index.codes) - ): - values = lev.take(level_codes) - name = names[i] - tm.assert_index_equal(values, Index(deleveled[name])) - - stacked.index.names = [None, None] - deleveled2 = stacked.reset_index() - tm.assert_series_equal( - deleveled["first"], deleveled2["level_0"], check_names=False - ) - tm.assert_series_equal( - deleveled["second"], deleveled2["level_1"], check_names=False - ) - - # default name assigned - rdf = float_frame.reset_index() - exp = Series(float_frame.index.values, name="index") - tm.assert_series_equal(rdf["index"], exp) - - # default name assigned, corner case - df = float_frame.copy() - df["index"] = "foo" - rdf = df.reset_index() - exp = Series(float_frame.index.values, name="level_0") - tm.assert_series_equal(rdf["level_0"], exp) - - # but this is ok - float_frame.index.name = "index" - deleveled = float_frame.reset_index() - tm.assert_series_equal(deleveled["index"], Series(float_frame.index)) - tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled)))) - - # preserve column names - float_frame.columns.name = "columns" - resetted = float_frame.reset_index() - assert resetted.columns.name == "columns" - - # only remove certain columns - df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index(["A", "B"]) - - # TODO should reset_index check_names ? - tm.assert_frame_equal(rs, float_frame, check_names=False) - - rs = df.reset_index(["index", "A", "B"]) - tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - - rs = df.reset_index(["index", "A", "B"]) - tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - - rs = df.reset_index("A") - xp = float_frame.reset_index().set_index(["index", "B"]) - tm.assert_frame_equal(rs, xp, check_names=False) - - # test resetting in place - df = float_frame.copy() - resetted = float_frame.reset_index() - df.reset_index(inplace=True) - tm.assert_frame_equal(df, resetted, check_names=False) - - df = float_frame.reset_index().set_index(["index", "A", "B"]) - rs = df.reset_index("A", drop=True) - xp = float_frame.copy() - del xp["A"] - xp = xp.set_index(["B"], append=True) - tm.assert_frame_equal(rs, xp, check_names=False) - - def test_reset_index_name(self): - df = DataFrame( - [[1, 2, 3, 4], [5, 6, 7, 8]], - columns=["A", "B", "C", "D"], - index=Index(range(2), name="x"), - ) - assert df.reset_index().index.name is None - assert df.reset_index(drop=True).index.name is None - df.reset_index(inplace=True) - assert df.index.name is None - - def test_reset_index_level(self): - df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) - - for levels in ["A", "B"], [0, 1]: - # With MultiIndex - result = df.set_index(["A", "B"]).reset_index(level=levels[0]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = df.set_index(["A", "B"]).reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = df.set_index(["A", "B"]).reset_index(level=levels) - tm.assert_frame_equal(result, df) - - result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) - tm.assert_frame_equal(result, df[["C", "D"]]) - - # With single-level Index (GH 16263) - result = df.set_index("A").reset_index(level=levels[0]) - tm.assert_frame_equal(result, df) - - result = df.set_index("A").reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df) - - result = df.set_index(["A"]).reset_index(level=levels[0], drop=True) - tm.assert_frame_equal(result, df[["B", "C", "D"]]) - - # Missing levels - for both MultiIndex and single-level Index: - for idx_lev in ["A", "B"], ["A"]: - with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"): - df.set_index(idx_lev).reset_index(level=["A", "E"]) - with pytest.raises(IndexError, match="Too many levels"): - df.set_index(idx_lev).reset_index(level=[0, 1, 2]) - - def test_reset_index_right_dtype(self): - time = np.arange(0.0, 10, np.sqrt(2) / 2) - s1 = Series( - (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed" - ) - df = DataFrame(s1) - - resetted = s1.reset_index() - assert resetted["time"].dtype == np.float64 - - resetted = df.reset_index() - assert resetted["time"].dtype == np.float64 - - def test_reset_index_multiindex_col(self): - vals = np.random.randn(3, 3).astype(object) - idx = ["x", "y", "z"] - full = np.hstack(([[x] for x in idx], vals)) - df = DataFrame( - vals, - Index(idx, name="a"), - columns=[["b", "b", "c"], ["mean", "median", "mean"]], - ) - rs = df.reset_index() - xp = DataFrame( - full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]] - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index(col_fill=None) - xp = DataFrame( - full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]] - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index(col_level=1, col_fill="blah") - xp = DataFrame( - full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]] - ) - tm.assert_frame_equal(rs, xp) - - df = DataFrame( - vals, - MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), - columns=[["b", "b", "c"], ["mean", "median", "mean"]], - ) - rs = df.reset_index("a") - xp = DataFrame( - full, - Index([0, 1, 2], name="d"), - columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]], - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index("a", col_fill=None) - xp = DataFrame( - full, - Index(range(3), name="d"), - columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]], - ) - tm.assert_frame_equal(rs, xp) - - rs = df.reset_index("a", col_fill="blah", col_level=1) - xp = DataFrame( - full, - Index(range(3), name="d"), - columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]], - ) - tm.assert_frame_equal(rs, xp) - - def test_reset_index_multiindex_nan(self): - # GH6322, testing reset_index on MultiIndexes - # when we have a nan or all nan - df = DataFrame( - {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} - ) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - df = DataFrame( - {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} - ) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]}) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - df = DataFrame( - { - "A": ["a", "b", "c"], - "B": [np.nan, np.nan, np.nan], - "C": np.random.rand(3), - } - ) - rs = df.set_index(["A", "B"]).reset_index() - tm.assert_frame_equal(rs, df) - - def test_reset_index_with_datetimeindex_cols(self): - # GH5818 - # - df = DataFrame( - [[1, 2], [3, 4]], - columns=date_range("1/1/2013", "1/2/2013"), - index=["A", "B"], - ) - - result = df.reset_index() - expected = DataFrame( - [["A", 1, 2], ["B", 3, 4]], - columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)], - ) - tm.assert_frame_equal(result, expected) - - def test_reset_index_range(self): - # GH 12071 - df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2)) - result = df.reset_index() - assert isinstance(result.index, RangeIndex) - expected = DataFrame( - [[0, 0, 0], [1, 1, 1]], - columns=["index", "A", "B"], - index=RangeIndex(stop=2), - ) - tm.assert_frame_equal(result, expected) - def test_set_index_names(self): df = tm.makeDataFrame() df.index.name = "name" @@ -1262,92 +757,6 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self, float_string_frame): - renamed = float_string_frame.rename(columns=str.upper) - - assert "FOO" in renamed - assert "foo" not in renamed - - def test_rename_axis_style(self): - # https://github.com/pandas-dev/pandas/issues/12392 - df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"]) - expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) - - result = df.rename(str.lower, axis=1) - tm.assert_frame_equal(result, expected) - - result = df.rename(str.lower, axis="columns") - tm.assert_frame_equal(result, expected) - - result = df.rename({"A": "a", "B": "b"}, axis=1) - tm.assert_frame_equal(result, expected) - - result = df.rename({"A": "a", "B": "b"}, axis="columns") - tm.assert_frame_equal(result, expected) - - # Index - expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) - result = df.rename(str.lower, axis=0) - tm.assert_frame_equal(result, expected) - - result = df.rename(str.lower, axis="index") - tm.assert_frame_equal(result, expected) - - result = df.rename({"X": "x", "Y": "y"}, axis=0) - tm.assert_frame_equal(result, expected) - - result = df.rename({"X": "x", "Y": "y"}, axis="index") - tm.assert_frame_equal(result, expected) - - result = df.rename(mapper=str.lower, axis="index") - tm.assert_frame_equal(result, expected) - - def test_rename_mapper_multi(self): - df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index( - ["A", "B"] - ) - result = df.rename(str.upper) - expected = df.rename(index=str.upper) - tm.assert_frame_equal(result, expected) - - def test_rename_positional_named(self): - # https://github.com/pandas-dev/pandas/issues/12392 - df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) - result = df.rename(index=str.lower, columns=str.upper) - expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) - tm.assert_frame_equal(result, expected) - - def test_rename_axis_style_raises(self): - # see gh-12392 - df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) - - # Named target and axis - over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(index=str.lower, axis=1) - - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(index=str.lower, axis="columns") - - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(columns=str.lower, axis="columns") - - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(index=str.lower, axis=0) - - # Multiple targets and axis - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(str.lower, index=str.lower, axis="columns") - - # Too many targets - over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" - with pytest.raises(TypeError, match=over_spec_msg): - df.rename(str.lower, index=str.lower, columns=str.lower) - - # Duplicates - with pytest.raises(TypeError, match="multiple values"): - df.rename(id, mapper=id) - def test_reindex_api_equivalence(self): # equivalence of the labels/axis and index/columns API's df = DataFrame( @@ -1376,43 +785,6 @@ def test_reindex_api_equivalence(self): for res in [res2, res3]: tm.assert_frame_equal(res1, res) - def test_rename_positional_raises(self): - # GH 29136 - df = DataFrame(columns=["A", "B"]) - msg = r"rename\(\) takes from 1 to 2 positional arguments" - - with pytest.raises(TypeError, match=msg): - df.rename(None, str.lower) - - def test_rename_no_mappings_raises(self): - # GH 29136 - df = DataFrame([[1]]) - msg = "must pass an index to rename" - with pytest.raises(TypeError, match=msg): - df.rename() - - with pytest.raises(TypeError, match=msg): - df.rename(None, index=None) - - with pytest.raises(TypeError, match=msg): - df.rename(None, columns=None) - - with pytest.raises(TypeError, match=msg): - df.rename(None, columns=None, index=None) - - def test_rename_mapper_and_positional_arguments_raises(self): - # GH 29136 - df = DataFrame([[1]]) - msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" - with pytest.raises(TypeError, match=msg): - df.rename({}, index={}) - - with pytest.raises(TypeError, match=msg): - df.rename({}, columns={}) - - with pytest.raises(TypeError, match=msg): - df.rename({}, columns={}, index={}) - def test_assign_columns(self, float_frame): float_frame["hi"] = "there" diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py new file mode 100644 index 0000000000000..60182f509e657 --- /dev/null +++ b/pandas/tests/series/methods/test_rename.py @@ -0,0 +1,91 @@ +from datetime import datetime + +import numpy as np + +from pandas import Index, Series +import pandas._testing as tm + + +class TestRename: + def test_rename(self, datetime_series): + ts = datetime_series + renamer = lambda x: x.strftime("%Y%m%d") + renamed = ts.rename(renamer) + assert renamed.index[0] == renamer(ts.index[0]) + + # dict + rename_dict = dict(zip(ts.index, renamed.index)) + renamed2 = ts.rename(rename_dict) + tm.assert_series_equal(renamed, renamed2) + + # partial dict + s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") + renamed = s.rename({"b": "foo", "d": "bar"}) + tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) + + # index with name + renamer = Series( + np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" + ) + renamed = renamer.rename({}) + assert renamed.index.name == renamer.index.name + + def test_rename_by_series(self): + s = Series(range(5), name="foo") + renamer = Series({1: 10, 2: 20}) + result = s.rename(renamer) + expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") + tm.assert_series_equal(result, expected) + + def test_rename_set_name(self): + s = Series(range(4), index=list("abcd")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + result = s.rename(name) + assert result.name == name + tm.assert_numpy_array_equal(result.index.values, s.index.values) + assert s.name is None + + def test_rename_set_name_inplace(self): + s = Series(range(3), index=list("abc")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + s.rename(name, inplace=True) + assert s.name == name + + exp = np.array(["a", "b", "c"], dtype=np.object_) + tm.assert_numpy_array_equal(s.index.values, exp) + + def test_rename_axis_supported(self): + # Supporting axis for compatibility, detailed in GH-18589 + s = Series(range(5)) + s.rename({}, axis=0) + s.rename({}, axis="index") + # FIXME: dont leave commenred-out + # TODO: clean up shared index validation + # with pytest.raises(ValueError, match="No axis named 5"): + # s.rename({}, axis=5) + + def test_rename_inplace(self, datetime_series): + renamer = lambda x: x.strftime("%Y%m%d") + expected = renamer(datetime_series.index[0]) + + datetime_series.rename(renamer, inplace=True) + assert datetime_series.index[0] == expected + + def test_rename_with_custom_indexer(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + s = Series([1, 2, 3]).rename(ix) + assert s.name is ix + + def test_rename_with_custom_indexer_inplace(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + s = Series([1, 2, 3]) + s.rename(ix, inplace=True) + assert s.name is ix diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py new file mode 100644 index 0000000000000..f0c4895ad7c10 --- /dev/null +++ b/pandas/tests/series/methods/test_reset_index.py @@ -0,0 +1,110 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series +import pandas._testing as tm + + +class TestResetIndex: + def test_reset_index(self): + df = tm.makeDataFrame()[:5] + ser = df.stack() + ser.index.names = ["hash", "category"] + + ser.name = "value" + df = ser.reset_index() + assert "value" in df + + df = ser.reset_index(name="value2") + assert "value2" in df + + # check inplace + s = ser.reset_index(drop=True) + s2 = ser + s2.reset_index(drop=True, inplace=True) + tm.assert_series_equal(s, s2) + + # level + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + rs = s.reset_index(level=1) + assert len(rs.columns) == 2 + + rs = s.reset_index(level=[0, 2], drop=True) + tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) + assert isinstance(rs, Series) + + def test_reset_index_name(self): + s = Series([1, 2, 3], index=Index(range(3), name="x")) + assert s.reset_index().index.name is None + assert s.reset_index(drop=True).index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + s = df.set_index(["A", "B"])["C"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C"]]) + + with pytest.raises(KeyError, match="Level E "): + s.reset_index(level=["A", "E"]) + + # With single-level Index + s = df.set_index("A")["B"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[0], drop=True) + tm.assert_series_equal(result, df["B"]) + + with pytest.raises(IndexError, match="Too many levels"): + s.reset_index(level=[0, 1, 2]) + + # Check that .reset_index([],drop=True) doesn't fail + result = Series(range(4)).reset_index([], drop=True) + expected = Series(range(4)) + tm.assert_series_equal(result, expected) + + def test_reset_index_range(self): + # GH 12071 + s = Series(range(2), name="A", dtype="int64") + series_result = s.reset_index() + assert isinstance(series_result.index, RangeIndex) + series_expected = DataFrame( + [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2) + ) + tm.assert_frame_equal(series_result, series_expected) + + def test_reset_index_drop_errors(self): + # GH 20925 + + # KeyError raised for series index when passed level name is missing + s = Series(range(4)) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong", drop=True) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong") + + # KeyError raised for series when level to be dropped is missing + s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) + with pytest.raises(KeyError, match="not found"): + s.reset_index("wrong", drop=True) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 71f6681e8c955..9be8744d7223f 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series +from pandas import Index, MultiIndex, Series import pandas._testing as tm @@ -31,62 +31,6 @@ def test_setindex(self, string_series): # Renaming - def test_rename(self, datetime_series): - ts = datetime_series - renamer = lambda x: x.strftime("%Y%m%d") - renamed = ts.rename(renamer) - assert renamed.index[0] == renamer(ts.index[0]) - - # dict - rename_dict = dict(zip(ts.index, renamed.index)) - renamed2 = ts.rename(rename_dict) - tm.assert_series_equal(renamed, renamed2) - - # partial dict - s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") - renamed = s.rename({"b": "foo", "d": "bar"}) - tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) - - # index with name - renamer = Series( - np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" - ) - renamed = renamer.rename({}) - assert renamed.index.name == renamer.index.name - - def test_rename_by_series(self): - s = Series(range(5), name="foo") - renamer = Series({1: 10, 2: 20}) - result = s.rename(renamer) - expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") - tm.assert_series_equal(result, expected) - - def test_rename_set_name(self): - s = Series(range(4), index=list("abcd")) - for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: - result = s.rename(name) - assert result.name == name - tm.assert_numpy_array_equal(result.index.values, s.index.values) - assert s.name is None - - def test_rename_set_name_inplace(self): - s = Series(range(3), index=list("abc")) - for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: - s.rename(name, inplace=True) - assert s.name == name - - exp = np.array(["a", "b", "c"], dtype=np.object_) - tm.assert_numpy_array_equal(s.index.values, exp) - - def test_rename_axis_supported(self): - # Supporting axis for compatibility, detailed in GH-18589 - s = Series(range(5)) - s.rename({}, axis=0) - s.rename({}, axis="index") - # TODO: clean up shared index validation - # with pytest.raises(ValueError, match="No axis named 5"): - # s.rename({}, axis=5) - def test_set_name_attribute(self): s = Series([1, 2, 3]) s2 = Series([1, 2, 3], name="bar") @@ -103,13 +47,6 @@ def test_set_name(self): assert s.name is None assert s is not s2 - def test_rename_inplace(self, datetime_series): - renamer = lambda x: x.strftime("%Y%m%d") - expected = renamer(datetime_series.index[0]) - - datetime_series.rename(renamer, inplace=True) - assert datetime_series.index[0] == expected - def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) @@ -117,94 +54,6 @@ def test_set_index_makes_timeseries(self): s.index = idx assert s.index.is_all_dates - def test_reset_index(self): - df = tm.makeDataFrame()[:5] - ser = df.stack() - ser.index.names = ["hash", "category"] - - ser.name = "value" - df = ser.reset_index() - assert "value" in df - - df = ser.reset_index(name="value2") - assert "value2" in df - - # check inplace - s = ser.reset_index(drop=True) - s2 = ser - s2.reset_index(drop=True, inplace=True) - tm.assert_series_equal(s, s2) - - # level - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - s = Series(np.random.randn(6), index=index) - rs = s.reset_index(level=1) - assert len(rs.columns) == 2 - - rs = s.reset_index(level=[0, 2], drop=True) - tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) - assert isinstance(rs, Series) - - def test_reset_index_name(self): - s = Series([1, 2, 3], index=Index(range(3), name="x")) - assert s.reset_index().index.name is None - assert s.reset_index(drop=True).index.name is None - - def test_reset_index_level(self): - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) - - for levels in ["A", "B"], [0, 1]: - # With MultiIndex - s = df.set_index(["A", "B"])["C"] - - result = s.reset_index(level=levels[0]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = s.reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df.set_index("B")) - - result = s.reset_index(level=levels) - tm.assert_frame_equal(result, df) - - result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) - tm.assert_frame_equal(result, df[["C"]]) - - with pytest.raises(KeyError, match="Level E "): - s.reset_index(level=["A", "E"]) - - # With single-level Index - s = df.set_index("A")["B"] - - result = s.reset_index(level=levels[0]) - tm.assert_frame_equal(result, df[["A", "B"]]) - - result = s.reset_index(level=levels[:1]) - tm.assert_frame_equal(result, df[["A", "B"]]) - - result = s.reset_index(level=levels[0], drop=True) - tm.assert_series_equal(result, df["B"]) - - with pytest.raises(IndexError, match="Too many levels"): - s.reset_index(level=[0, 1, 2]) - - # Check that .reset_index([],drop=True) doesn't fail - result = Series(range(4)).reset_index([], drop=True) - expected = Series(range(4)) - tm.assert_series_equal(result, expected) - - def test_reset_index_range(self): - # GH 12071 - s = Series(range(2), name="A", dtype="int64") - series_result = s.reset_index() - assert isinstance(series_result.index, RangeIndex) - series_expected = DataFrame( - [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2) - ) - tm.assert_frame_equal(series_result, series_expected) - def test_reorder_levels(self): index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], @@ -268,25 +117,6 @@ def test_rename_axis_none(self, kwargs): expected = Series([1, 2, 3], index=expected_index) tm.assert_series_equal(result, expected) - def test_rename_with_custom_indexer(self): - # GH 27814 - class MyIndexer: - pass - - ix = MyIndexer() - s = Series([1, 2, 3]).rename(ix) - assert s.name is ix - - def test_rename_with_custom_indexer_inplace(self): - # GH 27814 - class MyIndexer: - pass - - ix = MyIndexer() - s = Series([1, 2, 3]) - s.rename(ix, inplace=True) - assert s.name is ix - def test_set_axis_inplace_axes(self, axis_series): # GH14636 ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64") @@ -323,21 +153,6 @@ def test_set_axis_inplace(self): with pytest.raises(ValueError, match="No axis named"): s.set_axis(list("abcd"), axis=axis, inplace=False) - def test_reset_index_drop_errors(self): - # GH 20925 - - # KeyError raised for series index when passed level name is missing - s = Series(range(4)) - with pytest.raises(KeyError, match="does not match index name"): - s.reset_index("wrong", drop=True) - with pytest.raises(KeyError, match="does not match index name"): - s.reset_index("wrong") - - # KeyError raised for series when level to be dropped is missing - s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) - with pytest.raises(KeyError, match="not found"): - s.reset_index("wrong", drop=True) - def test_droplevel(self): # GH20342 ser = Series([1, 2, 3, 4]) From 1bcc368ed7bd955303ff7d9378b59f228a100aa2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 12:41:54 -0800 Subject: [PATCH 137/388] REF/TST: method-specific files for combine, update; parametrize (#32228) --- pandas/tests/series/methods/test_combine.py | 17 +++ pandas/tests/series/methods/test_update.py | 58 +++++++++ pandas/tests/series/test_combine_concat.py | 133 +++++--------------- 3 files changed, 105 insertions(+), 103 deletions(-) create mode 100644 pandas/tests/series/methods/test_combine.py create mode 100644 pandas/tests/series/methods/test_update.py diff --git a/pandas/tests/series/methods/test_combine.py b/pandas/tests/series/methods/test_combine.py new file mode 100644 index 0000000000000..75d47e3daa103 --- /dev/null +++ b/pandas/tests/series/methods/test_combine.py @@ -0,0 +1,17 @@ +from pandas import Series +import pandas._testing as tm + + +class TestCombine: + def test_combine_scalar(self): + # GH#21248 + # Note - combine() with another Series is tested elsewhere because + # it is used when testing operators + ser = Series([i * 10 for i in range(5)]) + result = ser.combine(3, lambda x, y: x + y) + expected = Series([i * 10 + 3 for i in range(5)]) + tm.assert_series_equal(result, expected) + + result = ser.combine(22, lambda x, y: min(x, y)) + expected = Series([min(i * 10, 22) for i in range(5)]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py new file mode 100644 index 0000000000000..b7f5f33294792 --- /dev/null +++ b/pandas/tests/series/methods/test_update.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestUpdate: + def test_update(self): + s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) + s2 = Series([np.nan, 3.5, np.nan, 5.0]) + s.update(s2) + + expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) + tm.assert_series_equal(s, expected) + + # GH 3217 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df["c"] = np.nan + + df["c"].update(Series(["foo"], index=[0])) + expected = DataFrame( + [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "other, dtype, expected", + [ + # other is int + ([61, 63], "int32", Series([10, 61, 12], dtype="int32")), + ([61, 63], "int64", Series([10, 61, 12])), + ([61, 63], float, Series([10.0, 61.0, 12.0])), + ([61, 63], object, Series([10, 61, 12], dtype=object)), + # other is float, but can be cast to int + ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32")), + ([61.0, 63.0], "int64", Series([10, 61, 12])), + ([61.0, 63.0], float, Series([10.0, 61.0, 12.0])), + ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object)), + # others is float, cannot be cast to int + ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], float, Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object)), + # other is object, cannot be cast + ([(61,), (63,)], "int32", Series([10, (61,), 12])), + ([(61,), (63,)], "int64", Series([10, (61,), 12])), + ([(61,), (63,)], float, Series([10.0, (61,), 12.0])), + ([(61,), (63,)], object, Series([10, (61,), 12])), + ], + ) + def test_update_dtypes(self, other, dtype, expected): + + ser = Series([10, 11, 12], dtype=dtype) + other = Series(other, index=[1, 3]) + ser.update(other) + + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 4afa083e97c7c..adb79f69c2d81 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -2,84 +2,27 @@ import pytest import pandas as pd -from pandas import DataFrame, Series -import pandas._testing as tm +from pandas import Series class TestSeriesCombine: - def test_combine_scalar(self): - # GH 21248 - # Note - combine() with another Series is tested elsewhere because - # it is used when testing operators - s = pd.Series([i * 10 for i in range(5)]) - result = s.combine(3, lambda x, y: x + y) - expected = pd.Series([i * 10 + 3 for i in range(5)]) - tm.assert_series_equal(result, expected) - - result = s.combine(22, lambda x, y: min(x, y)) - expected = pd.Series([min(i * 10, 22) for i in range(5)]) - tm.assert_series_equal(result, expected) - - def test_update(self): - s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) - s2 = Series([np.nan, 3.5, np.nan, 5.0]) - s.update(s2) - - expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) - tm.assert_series_equal(s, expected) - - # GH 3217 - df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) - df["c"] = np.nan - - df["c"].update(Series(["foo"], index=[0])) - expected = DataFrame( - [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] - ) - tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize( - "other, dtype, expected", - [ - # other is int - ([61, 63], "int32", pd.Series([10, 61, 12], dtype="int32")), - ([61, 63], "int64", pd.Series([10, 61, 12])), - ([61, 63], float, pd.Series([10.0, 61.0, 12.0])), - ([61, 63], object, pd.Series([10, 61, 12], dtype=object)), - # other is float, but can be cast to int - ([61.0, 63.0], "int32", pd.Series([10, 61, 12], dtype="int32")), - ([61.0, 63.0], "int64", pd.Series([10, 61, 12])), - ([61.0, 63.0], float, pd.Series([10.0, 61.0, 12.0])), - ([61.0, 63.0], object, pd.Series([10, 61.0, 12], dtype=object)), - # others is float, cannot be cast to int - ([61.1, 63.1], "int32", pd.Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], "int64", pd.Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], float, pd.Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)), - # other is object, cannot be cast - ([(61,), (63,)], "int32", pd.Series([10, (61,), 12])), - ([(61,), (63,)], "int64", pd.Series([10, (61,), 12])), - ([(61,), (63,)], float, pd.Series([10.0, (61,), 12.0])), - ([(61,), (63,)], object, pd.Series([10, (61,), 12])), - ], + "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"] ) - def test_update_dtypes(self, other, dtype, expected): + def test_concat_empty_series_dtypes_match_roundtrips(self, dtype): + dtype = np.dtype(dtype) - s = Series([10, 11, 12], dtype=dtype) - other = Series(other, index=[1, 3]) - s.update(other) + result = pd.concat([Series(dtype=dtype)]) + assert result.dtype == dtype - tm.assert_series_equal(s, expected) + result = pd.concat([Series(dtype=dtype), Series(dtype=dtype)]) + assert result.dtype == dtype def test_concat_empty_series_dtypes_roundtrips(self): # round-tripping with self & like self dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]) - for dtype in dtypes: - assert pd.concat([Series(dtype=dtype)]).dtype == dtype - assert pd.concat([Series(dtype=dtype), Series(dtype=dtype)]).dtype == dtype - def int_result_type(dtype, dtype2): typs = {dtype.kind, dtype2.kind} if not len(typs - {"i", "u", "b"}) and ( @@ -118,35 +61,28 @@ def get_result_type(dtype, dtype2): result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype assert result.kind == expected - def test_concat_empty_series_dtypes(self): + @pytest.mark.parametrize( + "left,right,expected", + [ + # booleans + (np.bool_, np.int32, np.int32), + (np.bool_, np.float32, np.object_), + # datetime-like + ("m8[ns]", np.bool, np.object_), + ("m8[ns]", np.int64, np.object_), + ("M8[ns]", np.bool, np.object_), + ("M8[ns]", np.int64, np.object_), + # categorical + ("category", "category", "category"), + ("category", "object", "object"), + ], + ) + def test_concat_empty_series_dtypes(self, left, right, expected): + result = pd.concat([Series(dtype=left), Series(dtype=right)]) + assert result.dtype == expected - # booleans - assert ( - pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype - == np.int32 - ) - assert ( - pd.concat([Series(dtype=np.bool_), Series(dtype=np.float32)]).dtype - == np.object_ - ) + def test_concat_empty_series_dtypes_triple(self): - # datetime-like - assert ( - pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.bool)]).dtype - == np.object_ - ) - assert ( - pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.int64)]).dtype - == np.object_ - ) - assert ( - pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.bool)]).dtype - == np.object_ - ) - assert ( - pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.int64)]).dtype - == np.object_ - ) assert ( pd.concat( [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)] @@ -154,11 +90,7 @@ def test_concat_empty_series_dtypes(self): == np.object_ ) - # categorical - assert ( - pd.concat([Series(dtype="category"), Series(dtype="category")]).dtype - == "category" - ) + def test_concat_empty_series_dtype_category_with_array(self): # GH 18515 assert ( pd.concat( @@ -166,13 +98,8 @@ def test_concat_empty_series_dtypes(self): ).dtype == "float64" ) - assert ( - pd.concat([Series(dtype="category"), Series(dtype="object")]).dtype - == "object" - ) - # sparse - # TODO: move? + def test_concat_empty_series_dtypes_sparse(self): result = pd.concat( [ Series(dtype="float64").astype("Sparse"), From 2227c831610bfcd75377e0e41f786030cef9c4a5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 13:49:52 -0800 Subject: [PATCH 138/388] REF/TST: method-specific files for Series timeseries methods (#32226) --- pandas/tests/series/methods/test_asfreq.py | 83 +++++- pandas/tests/series/methods/test_at_time.py | 72 +++++ pandas/tests/series/methods/test_between.py | 35 +++ .../tests/series/methods/test_between_time.py | 144 ++++++++++ pandas/tests/series/methods/test_truncate.py | 33 +++ .../tests/series/methods/test_tz_convert.py | 29 ++ .../tests/series/methods/test_tz_localize.py | 88 +++++++ pandas/tests/series/test_datetime_values.py | 13 - pandas/tests/series/test_period.py | 28 -- pandas/tests/series/test_timeseries.py | 247 +----------------- pandas/tests/series/test_timezones.py | 147 +---------- 11 files changed, 485 insertions(+), 434 deletions(-) create mode 100644 pandas/tests/series/methods/test_at_time.py create mode 100644 pandas/tests/series/methods/test_between.py create mode 100644 pandas/tests/series/methods/test_between_time.py create mode 100644 pandas/tests/series/methods/test_tz_convert.py create mode 100644 pandas/tests/series/methods/test_tz_localize.py diff --git a/pandas/tests/series/methods/test_asfreq.py b/pandas/tests/series/methods/test_asfreq.py index 05ec56cf02182..d94b60384a07c 100644 --- a/pandas/tests/series/methods/test_asfreq.py +++ b/pandas/tests/series/methods/test_asfreq.py @@ -1,8 +1,13 @@ +from datetime import datetime + import numpy as np +import pytest -from pandas import DataFrame, Series, period_range +from pandas import DataFrame, DatetimeIndex, Series, date_range, period_range import pandas._testing as tm +from pandas.tseries.offsets import BDay, BMonthEnd + class TestAsFreq: # TODO: de-duplicate/parametrize or move DataFrame test @@ -21,3 +26,79 @@ def test_asfreq_ts(self): result = ts.asfreq("D", how="start") assert len(result) == len(ts) tm.assert_index_equal(result.index, index.asfreq("D", how="start")) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_aware_asfreq(self, tz): + dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) + + ser = Series(np.random.randn(len(dr)), index=dr) + + # it works! + ser.asfreq("T") + + def test_asfreq(self): + ts = Series( + [0.0, 1.0, 2.0], + index=[ + datetime(2009, 10, 30), + datetime(2009, 11, 30), + datetime(2009, 12, 31), + ], + ) + + daily_ts = ts.asfreq("B") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_series_equal(monthly_ts, ts) + + daily_ts = ts.asfreq("B", method="pad") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_series_equal(monthly_ts, ts) + + daily_ts = ts.asfreq(BDay()) + monthly_ts = daily_ts.asfreq(BMonthEnd()) + tm.assert_series_equal(monthly_ts, ts) + + result = ts[:0].asfreq("M") + assert len(result) == 0 + assert result is not ts + + daily_ts = ts.asfreq("D", fill_value=-1) + result = daily_ts.value_counts().sort_index() + expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() + tm.assert_series_equal(result, expected) + + def test_asfreq_datetimeindex_empty_series(self): + # GH#14320 + index = DatetimeIndex(["2016-09-29 11:00"]) + expected = Series(index=index, dtype=object).asfreq("H") + result = Series([3], index=index.copy()).asfreq("H") + tm.assert_index_equal(expected.index, result.index) + + def test_asfreq_keep_index_name(self): + # GH#9854 + index_name = "bar" + index = date_range("20130101", periods=20, name=index_name) + df = DataFrame(list(range(20)), columns=["foo"], index=index) + + assert index_name == df.index.name + assert index_name == df.asfreq("10D").index.name + + def test_asfreq_normalize(self): + rng = date_range("1/1/2000 09:30", periods=20) + norm = date_range("1/1/2000", periods=20) + vals = np.random.randn(20) + ts = Series(vals, index=rng) + + result = ts.asfreq("D", normalize=True) + norm = date_range("1/1/2000", periods=20) + expected = Series(vals, index=norm) + + tm.assert_series_equal(result, expected) + + vals = np.random.randn(20, 3) + ts = DataFrame(vals, index=rng) + + result = ts.asfreq("D", normalize=True) + expected = DataFrame(vals, index=norm) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_at_time.py b/pandas/tests/series/methods/test_at_time.py new file mode 100644 index 0000000000000..d9985cf33776a --- /dev/null +++ b/pandas/tests/series/methods/test_at_time.py @@ -0,0 +1,72 @@ +from datetime import time + +import numpy as np +import pytest + +from pandas._libs.tslibs import timezones + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestAtTime: + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_at_time(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_at_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts[time(9, 30)] + result_df = df.loc[time(9, 30)] + expected = ts[(rng.hour == 9) & (rng.minute == 30)] + exp_df = df[(rng.hour == 9) & (rng.minute == 30)] + + tm.assert_series_equal(result, expected) + tm.assert_frame_equal(result_df, exp_df) + + chunk = df.loc["1/4/2000":] + result = chunk.loc[time(9, 30)] + expected = result_df[-1:] + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.at_time(time(0, 0)) + tm.assert_series_equal(result, ts) + + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = Series(np.random.randn(len(rng)), rng) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + def test_at_time_raises(self): + # GH20725 + ser = Series("a b c".split()) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): + ser.at_time("00:00") diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py new file mode 100644 index 0000000000000..350a3fe6ff009 --- /dev/null +++ b/pandas/tests/series/methods/test_between.py @@ -0,0 +1,35 @@ +import numpy as np + +from pandas import Series, bdate_range, date_range, period_range +import pandas._testing as tm + + +class TestBetween: + + # TODO: redundant with test_between_datetime_values? + def test_between(self): + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + result = series.between(left, right) + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected) + + def test_between_datetime_values(self): + ser = Series(bdate_range("1/1/2000", periods=20).astype(object)) + ser[::2] = np.nan + + result = ser[ser.between(ser[3], ser[17])] + expected = ser[3:18].dropna() + tm.assert_series_equal(result, expected) + + result = ser[ser.between(ser[3], ser[17], inclusive=False)] + expected = ser[5:16].dropna() + tm.assert_series_equal(result, expected) + + def test_between_period_values(self): + ser = Series(period_range("2000-01-01", periods=10, freq="D")) + left, right = ser[[2, 7]] + result = ser.between(left, right) + expected = (ser >= left) & (ser <= right) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_between_time.py b/pandas/tests/series/methods/test_between_time.py new file mode 100644 index 0000000000000..3fa26afe77a1d --- /dev/null +++ b/pandas/tests/series/methods/test_between_time.py @@ -0,0 +1,144 @@ +from datetime import datetime, time +from itertools import product + +import numpy as np +import pytest + +from pandas._libs.tslibs import timezones +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestBetweenTime: + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_between_time(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_between_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert t >= stime + else: + assert t > stime + + if inc_end: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_series_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inc_end: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self): + # GH20725 + ser = Series("a b c".split()) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): + ser.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_types(self): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + frame = DataFrame({"A": 0}, index=rng) + with pytest.raises(ValueError, match=msg): + frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + series = Series(0, index=rng) + with pytest.raises(ValueError, match=msg): + series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + @td.skip_if_has_locale + def test_between_time_formats(self): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + strings = [ + ("2:00", "2:30"), + ("0200", "0230"), + ("2:00am", "2:30am"), + ("0200am", "0230am"), + ("2:00:00", "2:30:00"), + ("020000", "023000"), + ("2:00:00am", "2:30:00am"), + ("020000am", "023000am"), + ] + expected_length = 28 + + for time_string in strings: + assert len(ts.between_time(*time_string)) == expected_length + + def test_between_time_axis(self): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime, etime = ("08:00:00", "09:00:00") + expected_length = 7 + + assert len(ts.between_time(stime, etime)) == expected_length + assert len(ts.between_time(stime, etime, axis=0)) == expected_length + msg = "No axis named 1 for object type " + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, axis=1) diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py index d4e2890ed8bf0..c97369b349f56 100644 --- a/pandas/tests/series/methods/test_truncate.py +++ b/pandas/tests/series/methods/test_truncate.py @@ -1,7 +1,10 @@ +from datetime import datetime + import numpy as np import pytest import pandas as pd +from pandas import Series, date_range import pandas._testing as tm from pandas.tseries.offsets import BDay @@ -76,3 +79,33 @@ def test_truncate_nonsortedindex(self): with pytest.raises(ValueError, match=msg): ts.sort_values(ascending=False).truncate(before="2011-11", after="2011-12") + + def test_truncate_datetimeindex_tz(self): + # GH 9243 + idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") + s = Series(range(len(idx)), index=idx) + result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) + expected = Series([1, 2, 3], index=idx[1:4]) + tm.assert_series_equal(result, expected) + + def test_truncate_periodindex(self): + # GH 17717 + idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series1 = pd.Series([1, 2, 3], index=idx1) + result1 = series1.truncate(after="2017-09-02") + + expected_idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02")] + ) + tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) + + idx2 = pd.PeriodIndex( + [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series2 = pd.Series([1, 2, 3], index=idx2) + result2 = series2.sort_index().truncate(after="2017-09-02") + + expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) + tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2)) diff --git a/pandas/tests/series/methods/test_tz_convert.py b/pandas/tests/series/methods/test_tz_convert.py new file mode 100644 index 0000000000000..ce348d5323e62 --- /dev/null +++ b/pandas/tests/series/methods/test_tz_convert.py @@ -0,0 +1,29 @@ +import numpy as np +import pytest + +from pandas import DatetimeIndex, Series, date_range +import pandas._testing as tm + + +class TestTZConvert: + def test_series_tz_convert(self): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + ts = Series(1, index=rng) + + result = ts.tz_convert("Europe/Berlin") + assert result.index.tz.zone == "Europe/Berlin" + + # can't convert tz-naive + rng = date_range("1/1/2011", periods=200, freq="D") + ts = Series(1, index=rng) + + with pytest.raises(TypeError, match="Cannot convert tz-naive"): + ts.tz_convert("US/Eastern") + + def test_series_tz_convert_to_utc(self): + base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC") + idx1 = base.tz_convert("Asia/Tokyo")[:2] + idx2 = base.tz_convert("US/Eastern")[1:] + + res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) + tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py new file mode 100644 index 0000000000000..44c55edf77c0a --- /dev/null +++ b/pandas/tests/series/methods/test_tz_localize.py @@ -0,0 +1,88 @@ +import pytest +import pytz + +from pandas._libs.tslibs import timezones + +from pandas import DatetimeIndex, NaT, Series, Timestamp, date_range +import pandas._testing as tm + + +class TestTZLocalize: + def test_series_tz_localize(self): + + rng = date_range("1/1/2011", periods=100, freq="H") + ts = Series(1, index=rng) + + result = ts.tz_localize("utc") + assert result.index.tz.zone == "UTC" + + # Can't localize if already tz-aware + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + ts = Series(1, index=rng) + + with pytest.raises(TypeError, match="Already tz-aware"): + ts.tz_localize("US/Eastern") + + def test_series_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # GH#14402 + ts = Timestamp("2015-11-01 01:00:03") + expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") + expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") + + ser = Series([ts]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + with pytest.raises(pytz.AmbiguousTimeError): + ser.dt.tz_localize("US/Central") + + result = ser.dt.tz_localize("US/Central", ambiguous=True) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=[True]) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=False) + tm.assert_series_equal(result, expected1) + + result = ser.dt.tz_localize("US/Central", ambiguous=[False]) + tm.assert_series_equal(result, expected1) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + @pytest.mark.parametrize( + "method, exp", + [ + ["shift_forward", "2015-03-29 03:00:00"], + ["NaT", NaT], + ["raise", None], + ["foo", "invalid"], + ], + ) + def test_series_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") + s = Series(1, dti) + if method == "raise": + with pytest.raises(pytz.NonExistentTimeError): + s.tz_localize(tz, nonexistent=method) + elif exp == "invalid": + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) + else: + result = s.tz_localize(tz, nonexistent=method) + expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_series_tz_localize_empty(self, tzstr): + # GH#2248 + ser = Series(dtype=object) + + ser2 = ser.tz_localize("utc") + assert ser2.index.tz == pytz.utc + + ser2 = ser.tz_localize(tzstr) + timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index b8be4ea137e3d..59ae0cd63690c 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -19,7 +19,6 @@ PeriodIndex, Series, TimedeltaIndex, - bdate_range, date_range, period_range, timedelta_range, @@ -622,18 +621,6 @@ def test_dt_accessor_updates_on_inplace(self): result = s.dt.date assert result[0] == result[2] - def test_between(self): - s = Series(bdate_range("1/1/2000", periods=20).astype(object)) - s[::2] = np.nan - - result = s[s.between(s[3], s[17])] - expected = s[3:18].dropna() - tm.assert_series_equal(result, expected) - - result = s[s.between(s[3], s[17], inclusive=False)] - expected = s[5:16].dropna() - tm.assert_series_equal(result, expected) - def test_date_tz(self): # GH11757 rng = pd.DatetimeIndex( diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 03fee389542e3..f41245c2872a7 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -52,12 +52,6 @@ def test_dropna(self): s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")]) tm.assert_series_equal(s.dropna(), Series([pd.Period("2011-01", freq="M")])) - def test_between(self): - left, right = self.series[[2, 7]] - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - tm.assert_series_equal(result, expected) - # --------------------------------------------------------------------- # NaT support @@ -110,28 +104,6 @@ def test_align_series(self, join_type): ts.align(ts[::2], join=join_type) - def test_truncate(self): - # GH 17717 - idx1 = pd.PeriodIndex( - [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] - ) - series1 = pd.Series([1, 2, 3], index=idx1) - result1 = series1.truncate(after="2017-09-02") - - expected_idx1 = pd.PeriodIndex( - [pd.Period("2017-09-02"), pd.Period("2017-09-02")] - ) - tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) - - idx2 = pd.PeriodIndex( - [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] - ) - series2 = pd.Series([1, 2, 3], index=idx2) - result2 = series2.sort_index().truncate(after="2017-09-02") - - expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) - tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2)) - @pytest.mark.parametrize( "input_vals", [ diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 459377fb18f29..8f06ea69f5d66 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,13 +1,11 @@ -from datetime import datetime, time, timedelta +from datetime import datetime, timedelta from io import StringIO -from itertools import product import numpy as np import pytest from pandas._libs.tslib import iNaT from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -23,8 +21,6 @@ ) import pandas._testing as tm -from pandas.tseries.offsets import BDay, BMonthEnd - def _simple_ts(start, end, freq="D"): rng = date_range(start, end, freq=freq) @@ -38,44 +34,6 @@ def assert_range_equal(left, right): class TestTimeSeries: - def test_asfreq(self): - ts = Series( - [0.0, 1.0, 2.0], - index=[ - datetime(2009, 10, 30), - datetime(2009, 11, 30), - datetime(2009, 12, 31), - ], - ) - - daily_ts = ts.asfreq("B") - monthly_ts = daily_ts.asfreq("BM") - tm.assert_series_equal(monthly_ts, ts) - - daily_ts = ts.asfreq("B", method="pad") - monthly_ts = daily_ts.asfreq("BM") - tm.assert_series_equal(monthly_ts, ts) - - daily_ts = ts.asfreq(BDay()) - monthly_ts = daily_ts.asfreq(BMonthEnd()) - tm.assert_series_equal(monthly_ts, ts) - - result = ts[:0].asfreq("M") - assert len(result) == 0 - assert result is not ts - - daily_ts = ts.asfreq("D", fill_value=-1) - result = daily_ts.value_counts().sort_index() - expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() - tm.assert_series_equal(result, expected) - - def test_asfreq_datetimeindex_empty_series(self): - # GH 14320 - index = pd.DatetimeIndex(["2016-09-29 11:00"]) - expected = Series(index=index, dtype=object).asfreq("H") - result = Series([3], index=index.copy()).asfreq("H") - tm.assert_index_equal(expected.index, result.index) - def test_autocorr(self, datetime_series): # Just run the function corr1 = datetime_series.autocorr() @@ -268,15 +226,6 @@ def test_series_repr_nat(self): ) assert result == expected - def test_asfreq_keep_index_name(self): - # GH #9854 - index_name = "bar" - index = pd.date_range("20130101", periods=20, name=index_name) - df = pd.DataFrame(list(range(20)), columns=["foo"], index=index) - - assert index_name == df.index.name - assert index_name == df.asfreq("10D").index.name - def test_promote_datetime_date(self): rng = date_range("1/1/2000", periods=20) ts = Series(np.random.randn(20), index=rng) @@ -300,26 +249,6 @@ def test_promote_datetime_date(self): expected = rng.get_indexer(ts_slice.index) tm.assert_numpy_array_equal(result, expected) - def test_asfreq_normalize(self): - rng = date_range("1/1/2000 09:30", periods=20) - norm = date_range("1/1/2000", periods=20) - vals = np.random.randn(20) - ts = Series(vals, index=rng) - - result = ts.asfreq("D", normalize=True) - norm = date_range("1/1/2000", periods=20) - expected = Series(vals, index=norm) - - tm.assert_series_equal(result, expected) - - vals = np.random.randn(20, 3) - ts = DataFrame(vals, index=rng) - - result = ts.asfreq("D", normalize=True) - expected = DataFrame(vals, index=norm) - - tm.assert_frame_equal(result, expected) - def test_first_subset(self): ts = _simple_ts("1/1/2000", "1/1/2010", freq="12h") result = ts.first("10d") @@ -380,180 +309,6 @@ def test_format_pre_1900_dates(self): ts = Series(1, index=rng) repr(ts) - def test_at_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - rs = ts.at_time(rng[1]) - assert (rs.index.hour == rng[1].hour).all() - assert (rs.index.minute == rng[1].minute).all() - assert (rs.index.second == rng[1].second).all() - - result = ts.at_time("9:30") - expected = ts.at_time(time(9, 30)) - tm.assert_series_equal(result, expected) - - df = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts[time(9, 30)] - result_df = df.loc[time(9, 30)] - expected = ts[(rng.hour == 9) & (rng.minute == 30)] - exp_df = df[(rng.hour == 9) & (rng.minute == 30)] - - # FIXME: dont leave commented-out - # expected.index = date_range('1/1/2000', '1/4/2000') - - tm.assert_series_equal(result, expected) - tm.assert_frame_equal(result_df, exp_df) - - chunk = df.loc["1/4/2000":] - result = chunk.loc[time(9, 30)] - expected = result_df[-1:] - tm.assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range("1/1/2000", "1/31/2000") - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.at_time(time(0, 0)) - tm.assert_series_equal(result, ts) - - # time doesn't exist - rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = Series(np.random.randn(len(rng)), rng) - rs = ts.at_time("16:00") - assert len(rs) == 0 - - def test_at_time_raises(self): - # GH20725 - ser = pd.Series("a b c".split()) - msg = "Index must be DatetimeIndex" - with pytest.raises(TypeError, match=msg): - ser.at_time("00:00") - - def test_between(self): - series = Series(date_range("1/1/2000", periods=10)) - left, right = series[[2, 7]] - - result = series.between(left, right) - expected = (series >= left) & (series <= right) - tm.assert_series_equal(result, expected) - - def test_between_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert t >= stime - else: - assert t > stime - - if inc_end: - assert t <= etime - else: - assert t < etime - - result = ts.between_time("00:00", "01:00") - expected = ts.between_time(stime, etime) - tm.assert_series_equal(result, expected) - - # across midnight - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert (t >= stime) or (t <= etime) - else: - assert (t > stime) or (t <= etime) - - if inc_end: - assert (t <= etime) or (t >= stime) - else: - assert (t < etime) or (t >= stime) - - def test_between_time_raises(self): - # GH20725 - ser = pd.Series("a b c".split()) - msg = "Index must be DatetimeIndex" - with pytest.raises(TypeError, match=msg): - ser.between_time(start_time="00:00", end_time="12:00") - - def test_between_time_types(self): - # GH11818 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" - with pytest.raises(ValueError, match=msg): - rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - frame = DataFrame({"A": 0}, index=rng) - with pytest.raises(ValueError, match=msg): - frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - series = Series(0, index=rng) - with pytest.raises(ValueError, match=msg): - series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - @td.skip_if_has_locale - def test_between_time_formats(self): - # GH11818 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - - strings = [ - ("2:00", "2:30"), - ("0200", "0230"), - ("2:00am", "2:30am"), - ("0200am", "0230am"), - ("2:00:00", "2:30:00"), - ("020000", "023000"), - ("2:00:00am", "2:30:00am"), - ("020000am", "023000am"), - ] - expected_length = 28 - - for time_string in strings: - assert len(ts.between_time(*time_string)) == expected_length - - def test_between_time_axis(self): - # issue 8839 - rng = date_range("1/1/2000", periods=100, freq="10min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime, etime = ("08:00:00", "09:00:00") - expected_length = 7 - - assert len(ts.between_time(stime, etime)) == expected_length - assert len(ts.between_time(stime, etime, axis=0)) == expected_length - msg = "No axis named 1 for object type " - with pytest.raises(ValueError, match=msg): - ts.between_time(stime, etime, axis=1) - def test_to_period(self): from pandas.core.indexes.period import period_range diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index a363f927d10a9..74363f4c73c39 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -10,118 +10,12 @@ from pandas._libs.tslibs import conversion, timezones -from pandas import DatetimeIndex, Index, NaT, Series, Timestamp +from pandas import DatetimeIndex, Index, Series, Timestamp import pandas._testing as tm from pandas.core.indexes.datetimes import date_range class TestSeriesTimezones: - # ----------------------------------------------------------------- - # Series.tz_localize - def test_series_tz_localize(self): - - rng = date_range("1/1/2011", periods=100, freq="H") - ts = Series(1, index=rng) - - result = ts.tz_localize("utc") - assert result.index.tz.zone == "UTC" - - # Can't localize if already tz-aware - rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") - ts = Series(1, index=rng) - - with pytest.raises(TypeError, match="Already tz-aware"): - ts.tz_localize("US/Eastern") - - def test_series_tz_localize_ambiguous_bool(self): - # make sure that we are correctly accepting bool values as ambiguous - - # GH#14402 - ts = Timestamp("2015-11-01 01:00:03") - expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") - expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") - - ser = Series([ts]) - expected0 = Series([expected0]) - expected1 = Series([expected1]) - - with pytest.raises(pytz.AmbiguousTimeError): - ser.dt.tz_localize("US/Central") - - result = ser.dt.tz_localize("US/Central", ambiguous=True) - tm.assert_series_equal(result, expected0) - - result = ser.dt.tz_localize("US/Central", ambiguous=[True]) - tm.assert_series_equal(result, expected0) - - result = ser.dt.tz_localize("US/Central", ambiguous=False) - tm.assert_series_equal(result, expected1) - - result = ser.dt.tz_localize("US/Central", ambiguous=[False]) - tm.assert_series_equal(result, expected1) - - @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) - @pytest.mark.parametrize( - "method, exp", - [ - ["shift_forward", "2015-03-29 03:00:00"], - ["NaT", NaT], - ["raise", None], - ["foo", "invalid"], - ], - ) - def test_series_tz_localize_nonexistent(self, tz, method, exp): - # GH 8917 - n = 60 - dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") - s = Series(1, dti) - if method == "raise": - with pytest.raises(pytz.NonExistentTimeError): - s.tz_localize(tz, nonexistent=method) - elif exp == "invalid": - with pytest.raises(ValueError): - dti.tz_localize(tz, nonexistent=method) - else: - result = s.tz_localize(tz, nonexistent=method) - expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) - def test_series_tz_localize_empty(self, tzstr): - # GH#2248 - ser = Series(dtype=object) - - ser2 = ser.tz_localize("utc") - assert ser2.index.tz == pytz.utc - - ser2 = ser.tz_localize(tzstr) - timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) - - # ----------------------------------------------------------------- - # Series.tz_convert - - def test_series_tz_convert(self): - rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") - ts = Series(1, index=rng) - - result = ts.tz_convert("Europe/Berlin") - assert result.index.tz.zone == "Europe/Berlin" - - # can't convert tz-naive - rng = date_range("1/1/2011", periods=200, freq="D") - ts = Series(1, index=rng) - - with pytest.raises(TypeError, match="Cannot convert tz-naive"): - ts.tz_convert("US/Eastern") - - def test_series_tz_convert_to_utc(self): - base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC") - idx1 = base.tz_convert("Asia/Tokyo")[:2] - idx2 = base.tz_convert("US/Eastern")[1:] - - res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) - tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) - # ----------------------------------------------------------------- # Series.append @@ -225,15 +119,6 @@ def test_dateutil_tzoffset_support(self): # it works! #2443 repr(series.index[0]) - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) - def test_tz_aware_asfreq(self, tz): - dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) - - ser = Series(np.random.randn(len(dr)), index=dr) - - # it works! - ser.asfreq("T") - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_string_index_alias_tz_aware(self, tz): rng = date_range("1/1/2000", periods=10, tz=tz) @@ -299,28 +184,6 @@ def test_series_align_aware(self): assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC - @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) - def test_localized_at_time_between_time(self, tzstr): - from datetime import time - - tz = timezones.maybe_get_tz(tzstr) - - rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_local = ts.tz_localize(tzstr) - - result = ts_local.at_time(time(10, 0)) - expected = ts.at_time(time(10, 0)).tz_localize(tzstr) - tm.assert_series_equal(result, expected) - assert timezones.tz_compare(result.index.tz, tz) - - t1, t2 = time(10, 0), time(11, 0) - result = ts_local.between_time(t1, t2) - expected = ts.between_time(t1, t2).tz_localize(tzstr) - tm.assert_series_equal(result, expected) - assert timezones.tz_compare(result.index.tz, tz) - @pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"]) def test_getitem_pydatetime_tz(self, tzstr): tz = timezones.maybe_get_tz(tzstr) @@ -335,14 +198,6 @@ def test_getitem_pydatetime_tz(self, tzstr): time_datetime = conversion.localize_pydatetime(dt, tz) assert ts[time_pandas] == ts[time_datetime] - def test_series_truncate_datetimeindex_tz(self): - # GH 9243 - idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") - s = Series(range(len(idx)), index=idx) - result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) - expected = Series([1, 2, 3], index=idx[1:4]) - tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("copy", [True, False]) @pytest.mark.parametrize( "method, tz", [["tz_localize", None], ["tz_convert", "Europe/Berlin"]] From 2eca9e800b657d5564c1fdf5aa316f4dc809f424 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Tue, 25 Feb 2020 17:44:05 -0600 Subject: [PATCH 139/388] CLN/REF: Split up / clean Categorical constructor tests (#32211) * Split / parametrize * Dedupe some code * Parametrize * Add back from_codes for empty * Add comment --- .../arrays/categorical/test_constructors.py | 73 +++++++++---------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index d5537359d6948..c6b4c4904735c 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -353,9 +353,9 @@ def test_constructor_from_index_series_period(self): result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) - def test_constructor_invariant(self): - # GH 14190 - vals = [ + @pytest.mark.parametrize( + "values", + [ np.array([1.0, 1.2, 1.8, np.nan]), np.array([1, 2, 3], dtype="int64"), ["a", "b", "c", np.nan], @@ -366,11 +366,13 @@ def test_constructor_invariant(self): Timestamp("2014-01-02", tz="US/Eastern"), NaT, ], - ] - for val in vals: - c = Categorical(val) - c2 = Categorical(c) - tm.assert_categorical_equal(c, c2) + ], + ) + def test_constructor_invariant(self, values): + # GH 14190 + c = Categorical(values) + c2 = Categorical(c) + tm.assert_categorical_equal(c, c2) @pytest.mark.parametrize("ordered", [True, False]) def test_constructor_with_dtype(self, ordered): @@ -470,9 +472,14 @@ def test_construction_with_null(self, klass, nulls_fixture): tm.assert_categorical_equal(result, expected) - def test_from_codes(self): + def test_from_codes_empty(self): + cat = ["a", "b", "c"] + result = Categorical.from_codes([], categories=cat) + expected = Categorical([], categories=cat) - # too few categories + tm.assert_categorical_equal(result, expected) + + def test_from_codes_too_few_categories(self): dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be between " with pytest.raises(ValueError, match=msg): @@ -480,22 +487,23 @@ def test_from_codes(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], dtype=dtype) - # no int codes + def test_from_codes_non_int_codes(self): + dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be array-like integers" with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], dtype=dtype) - # no unique categories + def test_from_codes_non_unique_categories(self): with pytest.raises(ValueError, match="Categorical categories must be unique"): Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) - # NaN categories included + def test_from_codes_nan_cat_included(self): with pytest.raises(ValueError, match="Categorial categories cannot be null"): Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) - # too negative + def test_from_codes_too_negative(self): dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = r"codes need to be between -1 and len\(categories\)-1" with pytest.raises(ValueError, match=msg): @@ -503,6 +511,8 @@ def test_from_codes(self): with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], dtype=dtype) + def test_from_codes(self): + dtype = CategoricalDtype(categories=["a", "b", "c"]) exp = Categorical(["a", "b", "c"], ordered=False) res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) tm.assert_categorical_equal(exp, res) @@ -510,21 +520,18 @@ def test_from_codes(self): res = Categorical.from_codes([0, 1, 2], dtype=dtype) tm.assert_categorical_equal(exp, res) - def test_from_codes_with_categorical_categories(self): + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_categorical_categories(self, klass): # GH17884 expected = Categorical(["a", "b"], categories=["a", "b", "c"]) - result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"])) + result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) tm.assert_categorical_equal(result, expected) - result = Categorical.from_codes( - [0, 1], categories=CategoricalIndex(["a", "b", "c"]) - ) - tm.assert_categorical_equal(result, expected) - - # non-unique Categorical still raises + @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) + def test_from_codes_with_non_unique_categorical_categories(self, klass): with pytest.raises(ValueError, match="Categorical categories must be unique"): - Categorical.from_codes([0, 1], Categorical(["a", "b", "a"])) + Categorical.from_codes([0, 1], klass(["a", "b", "a"])) def test_from_codes_with_nan_code(self): # GH21767 @@ -535,24 +542,16 @@ def test_from_codes_with_nan_code(self): with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype) - def test_from_codes_with_float(self): + @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) + def test_from_codes_with_float(self, codes): # GH21767 - codes = [1.0, 2.0, 0] # integer, but in float dtype + # float codes should raise even if values are equal to integers dtype = CategoricalDtype(categories=["a", "b", "c"]) - # empty codes should not raise for floats - Categorical.from_codes([], dtype.categories) - - with pytest.raises(ValueError, match="codes need to be array-like integers"): - Categorical.from_codes(codes, dtype.categories) - - with pytest.raises(ValueError, match="codes need to be array-like integers"): - Categorical.from_codes(codes, dtype=dtype) - - codes = [1.1, 2.0, 0] # non-integer - with pytest.raises(ValueError, match="codes need to be array-like integers"): + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype.categories) - with pytest.raises(ValueError, match="codes need to be array-like integers"): + with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype=dtype) def test_from_codes_with_dtype_raises(self): From 786bfd90cbff6e2fe21a614e2c6d722a80c9bb0e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 17:37:30 -0800 Subject: [PATCH 140/388] TST: show_versions test with unmerged commits (#32139) --- pandas/tests/util/test_show_versions.py | 18 +++++++++++++++++ pandas/util/_print_versions.py | 27 +++++++++++-------------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 0d2c81c4ea6c7..e36ea662fac8b 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -1,8 +1,26 @@ import re +import pytest + import pandas as pd +@pytest.mark.filterwarnings( + # openpyxl + "ignore:defusedxml.lxml is no longer supported:DeprecationWarning" +) +@pytest.mark.filterwarnings( + # html5lib + "ignore:Using or importing the ABCs from:DeprecationWarning" +) +@pytest.mark.filterwarnings( + # fastparquet + "ignore:pandas.core.index is deprecated:FutureWarning" +) +@pytest.mark.filterwarnings( + # pandas_datareader + "ignore:pandas.util.testing is deprecated:FutureWarning" +) def test_show_versions(capsys): # gh-32041 pd.show_versions() diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index 99b2b9e9f5f6e..f9502cc22b0c6 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -4,13 +4,23 @@ import os import platform import struct -import subprocess import sys from typing import List, Optional, Tuple, Union from pandas.compat._optional import VERSIONS, _get_version, import_optional_dependency +def _get_commit_hash() -> Optional[str]: + """ + Use vendored versioneer code to get git hash, which handles + git worktree correctly. + """ + from pandas._version import get_versions + + versions = get_versions() + return versions["full-revisionid"] + + def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]: """ Returns system information as a list @@ -18,20 +28,7 @@ def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]: blob: List[Tuple[str, Optional[Union[str, int]]]] = [] # get full commit hash - commit = None - if os.path.isdir(".git") and os.path.isdir("pandas"): - try: - pipe = subprocess.Popen( - 'git log --format="%H" -n 1'.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - so, serr = pipe.communicate() - except (OSError, ValueError): - pass - else: - if pipe.returncode == 0: - commit = so.decode("utf-8").strip().strip('"') + commit = _get_commit_hash() blob.append(("commit", commit)) From c5e3e254024efa9db29f3eff7c3284385d8a200f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:00:49 -0800 Subject: [PATCH 141/388] implement test_select_dtypes (#32250) --- .../tests/frame/methods/test_select_dtypes.py | 329 ++++++++++++++++++ pandas/tests/frame/test_dtypes.py | 319 ----------------- 2 files changed, 329 insertions(+), 319 deletions(-) create mode 100644 pandas/tests/frame/methods/test_select_dtypes.py diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py new file mode 100644 index 0000000000000..fe7baebcf0cf7 --- /dev/null +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -0,0 +1,329 @@ +from collections import OrderedDict + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Timestamp +import pandas._testing as tm + + +class TestSelectDtypes: + def test_select_dtypes_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=[np.number]) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number], exclude=["timedelta"]) + ei = df[["b", "c", "d"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"]) + ei = df[["b", "c", "d", "f"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetime64"]) + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=["datetimetz"]) + ei = df[["h", "i"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include=["period"]) + + def test_select_dtypes_exclude_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + } + ) + re = df.select_dtypes(exclude=[np.number]) + ee = df[["a", "e"]] + tm.assert_frame_equal(re, ee) + + def test_select_dtypes_exclude_include_using_list_like(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + exclude = (np.datetime64,) + include = np.bool_, "integer" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "c", "e"]] + tm.assert_frame_equal(r, e) + + exclude = ("datetime",) + include = "bool", "int64", "int32" + r = df.select_dtypes(include=include, exclude=exclude) + e = df[["b", "e"]] + tm.assert_frame_equal(r, e) + + def test_select_dtypes_include_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number) + ei = df[["b", "c", "d", "k"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="datetime64") + ei = df[["g"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include="category") + ei = df[["f"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(include="period") + + def test_select_dtypes_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(exclude=np.number) + ei = df[["a", "e", "f", "g", "h", "i", "j"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(exclude="category") + ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]] + tm.assert_frame_equal(ri, ei) + + with pytest.raises(NotImplementedError, match=r"^$"): + df.select_dtypes(exclude="period") + + def test_select_dtypes_include_exclude_using_scalars(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude="floating") + ei = df[["b", "c", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_include_exclude_mixed_scalars_lists(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "i": pd.date_range("20130101", periods=3, tz="CET"), + "j": pd.period_range("2013-01", periods=3, freq="M"), + "k": pd.timedelta_range("1 day", periods=3), + } + ) + + ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"]) + ei = df[["b", "c"]] + tm.assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number, "category"], exclude="floating") + ei = df[["b", "c", "f", "k"]] + tm.assert_frame_equal(ri, ei) + + def test_select_dtypes_duplicate_columns(self): + # GH20839 + odict = OrderedDict + df = DataFrame( + odict( + [ + ("a", list("abc")), + ("b", list(range(1, 4))), + ("c", np.arange(3, 6).astype("u1")), + ("d", np.arange(4.0, 7.0, dtype="float64")), + ("e", [True, False, True]), + ("f", pd.date_range("now", periods=3).values), + ] + ) + ) + df.columns = ["a", "a", "b", "b", "b", "c"] + + expected = DataFrame( + {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")} + ) + + result = df.select_dtypes(include=[np.number], exclude=["floating"]) + tm.assert_frame_equal(result, expected) + + def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + df["g"] = df.f.diff() + assert not hasattr(np, "u8") + r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"]) + e = df[["a", "b"]] + tm.assert_frame_equal(r, e) + + r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"]) + e = df[["a", "b", "g"]] + tm.assert_frame_equal(r, e) + + def test_select_dtypes_empty(self): + df = DataFrame({"a": list("abc"), "b": list(range(1, 4))}) + msg = "at least one of include or exclude must be nonempty" + with pytest.raises(ValueError, match=msg): + df.select_dtypes() + + def test_select_dtypes_bad_datetime64(self): + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(include=["datetime64[D]"]) + + with pytest.raises(ValueError, match=".+ is too specific"): + df.select_dtypes(exclude=["datetime64[as]"]) + + def test_select_dtypes_datetime_with_tz(self): + + df2 = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130603", tz="CET"), + ), + index=range(5), + ) + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + result = df3.select_dtypes(include=["datetime64[ns]"]) + expected = df3.reindex(columns=[]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"] + ) + @pytest.mark.parametrize("arg", ["include", "exclude"]) + def test_select_dtypes_str_raises(self, dtype, arg): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + msg = "string dtypes are not allowed" + kwargs = {arg: [dtype]} + + with pytest.raises(TypeError, match=msg): + df.select_dtypes(**kwargs) + + def test_select_dtypes_bad_arg_raises(self): + df = DataFrame( + { + "a": list("abc"), + "g": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + + msg = "data type.*not understood" + with pytest.raises(TypeError, match=msg): + df.select_dtypes(["blargy, blarg, blarg"]) + + def test_select_dtypes_typecodes(self): + # GH 11990 + df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random()) + expected = df + FLOAT_TYPES = list(np.typecodes["AllFloat"]) + tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 8b63f0614eebf..713d8f3ceeedb 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -111,325 +111,6 @@ def test_dtypes_are_correct_after_column_slice(self): pd.Series(odict([("a", np.float_), ("b", np.float_), ("c", np.float_)])), ) - def test_select_dtypes_include_using_list_like(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=[np.number]) - ei = df[["b", "c", "d", "k"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=[np.number], exclude=["timedelta"]) - ei = df[["b", "c", "d"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=[np.number, "category"], exclude=["timedelta"]) - ei = df[["b", "c", "d", "f"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=["datetime"]) - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=["datetime64"]) - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=["datetimetz"]) - ei = df[["h", "i"]] - tm.assert_frame_equal(ri, ei) - - with pytest.raises(NotImplementedError, match=r"^$"): - df.select_dtypes(include=["period"]) - - def test_select_dtypes_exclude_using_list_like(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - } - ) - re = df.select_dtypes(exclude=[np.number]) - ee = df[["a", "e"]] - tm.assert_frame_equal(re, ee) - - def test_select_dtypes_exclude_include_using_list_like(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - exclude = (np.datetime64,) - include = np.bool_, "integer" - r = df.select_dtypes(include=include, exclude=exclude) - e = df[["b", "c", "e"]] - tm.assert_frame_equal(r, e) - - exclude = ("datetime",) - include = "bool", "int64", "int32" - r = df.select_dtypes(include=include, exclude=exclude) - e = df[["b", "e"]] - tm.assert_frame_equal(r, e) - - def test_select_dtypes_include_using_scalars(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=np.number) - ei = df[["b", "c", "d", "k"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include="datetime") - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include="datetime64") - ei = df[["g"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include="category") - ei = df[["f"]] - tm.assert_frame_equal(ri, ei) - - with pytest.raises(NotImplementedError, match=r"^$"): - df.select_dtypes(include="period") - - def test_select_dtypes_exclude_using_scalars(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(exclude=np.number) - ei = df[["a", "e", "f", "g", "h", "i", "j"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(exclude="category") - ei = df[["a", "b", "c", "d", "e", "g", "h", "i", "j", "k"]] - tm.assert_frame_equal(ri, ei) - - with pytest.raises(NotImplementedError, match=r"^$"): - df.select_dtypes(exclude="period") - - def test_select_dtypes_include_exclude_using_scalars(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=np.number, exclude="floating") - ei = df[["b", "c", "k"]] - tm.assert_frame_equal(ri, ei) - - def test_select_dtypes_include_exclude_mixed_scalars_lists(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - "i": pd.date_range("20130101", periods=3, tz="CET"), - "j": pd.period_range("2013-01", periods=3, freq="M"), - "k": pd.timedelta_range("1 day", periods=3), - } - ) - - ri = df.select_dtypes(include=np.number, exclude=["floating", "timedelta"]) - ei = df[["b", "c"]] - tm.assert_frame_equal(ri, ei) - - ri = df.select_dtypes(include=[np.number, "category"], exclude="floating") - ei = df[["b", "c", "f", "k"]] - tm.assert_frame_equal(ri, ei) - - def test_select_dtypes_duplicate_columns(self): - # GH20839 - odict = OrderedDict - df = DataFrame( - odict( - [ - ("a", list("abc")), - ("b", list(range(1, 4))), - ("c", np.arange(3, 6).astype("u1")), - ("d", np.arange(4.0, 7.0, dtype="float64")), - ("e", [True, False, True]), - ("f", pd.date_range("now", periods=3).values), - ] - ) - ) - df.columns = ["a", "a", "b", "b", "b", "c"] - - expected = DataFrame( - {"a": list(range(1, 4)), "b": np.arange(3, 6).astype("u1")} - ) - - result = df.select_dtypes(include=[np.number], exclude=["floating"]) - tm.assert_frame_equal(result, expected) - - def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - df["g"] = df.f.diff() - assert not hasattr(np, "u8") - r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"]) - e = df[["a", "b"]] - tm.assert_frame_equal(r, e) - - r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"]) - e = df[["a", "b", "g"]] - tm.assert_frame_equal(r, e) - - def test_select_dtypes_empty(self): - df = DataFrame({"a": list("abc"), "b": list(range(1, 4))}) - msg = "at least one of include or exclude must be nonempty" - with pytest.raises(ValueError, match=msg): - df.select_dtypes() - - def test_select_dtypes_bad_datetime64(self): - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - with pytest.raises(ValueError, match=".+ is too specific"): - df.select_dtypes(include=["datetime64[D]"]) - - with pytest.raises(ValueError, match=".+ is too specific"): - df.select_dtypes(exclude=["datetime64[as]"]) - - def test_select_dtypes_datetime_with_tz(self): - - df2 = DataFrame( - dict( - A=Timestamp("20130102", tz="US/Eastern"), - B=Timestamp("20130603", tz="CET"), - ), - index=range(5), - ) - df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) - result = df3.select_dtypes(include=["datetime64[ns]"]) - expected = df3.reindex(columns=[]) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"] - ) - @pytest.mark.parametrize("arg", ["include", "exclude"]) - def test_select_dtypes_str_raises(self, dtype, arg): - df = DataFrame( - { - "a": list("abc"), - "g": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - msg = "string dtypes are not allowed" - kwargs = {arg: [dtype]} - - with pytest.raises(TypeError, match=msg): - df.select_dtypes(**kwargs) - - def test_select_dtypes_bad_arg_raises(self): - df = DataFrame( - { - "a": list("abc"), - "g": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.date_range("now", periods=3).values, - } - ) - - msg = "data type.*not understood" - with pytest.raises(TypeError, match=msg): - df.select_dtypes(["blargy, blarg, blarg"]) - - def test_select_dtypes_typecodes(self): - # GH 11990 - df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random()) - expected = df - FLOAT_TYPES = list(np.typecodes["AllFloat"]) - tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) - def test_dtypes_gh8722(self, float_string_frame): float_string_frame["bool"] = float_string_frame["A"] > 0 result = float_string_frame.dtypes From 5f9fb1df6163b4634e4421342affb6419b905b81 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:01:47 -0800 Subject: [PATCH 142/388] test_combine.py (#32253) --- pandas/tests/frame/methods/test_combine.py | 47 ++++++++++++++++++++++ pandas/tests/frame/test_combine_concat.py | 21 ---------- pandas/tests/frame/test_operators.py | 19 --------- 3 files changed, 47 insertions(+), 40 deletions(-) create mode 100644 pandas/tests/frame/methods/test_combine.py diff --git a/pandas/tests/frame/methods/test_combine.py b/pandas/tests/frame/methods/test_combine.py new file mode 100644 index 0000000000000..bc6a67e4e1f32 --- /dev/null +++ b/pandas/tests/frame/methods/test_combine.py @@ -0,0 +1,47 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestCombine: + @pytest.mark.parametrize( + "data", + [ + pd.date_range("2000", periods=4), + pd.date_range("2000", periods=4, tz="US/Central"), + pd.period_range("2000", periods=4), + pd.timedelta_range(0, periods=4), + ], + ) + def test_combine_datetlike_udf(self, data): + # GH#23079 + df = pd.DataFrame({"A": data}) + other = df.copy() + df.iloc[1, 0] = None + + def combiner(a, b): + return b + + result = df.combine(other, combiner) + tm.assert_frame_equal(result, other) + + def test_combine_generic(self, float_frame): + df1 = float_frame + df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]] + + combined = df1.combine(df2, np.add) + combined2 = df2.combine(df1, np.add) + assert combined["D"].isna().all() + assert combined2["D"].isna().all() + + chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]] + chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]] + + exp = ( + float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk) + * 2 + ) + tm.assert_frame_equal(chunk, exp) + tm.assert_frame_equal(chunk2, exp) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 36a476d195fe5..321eb5fe94daf 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -21,27 +21,6 @@ def test_concat_multiple_frames_dtypes(self): ) tm.assert_series_equal(results, expected) - @pytest.mark.parametrize( - "data", - [ - pd.date_range("2000", periods=4), - pd.date_range("2000", periods=4, tz="US/Central"), - pd.period_range("2000", periods=4), - pd.timedelta_range(0, periods=4), - ], - ) - def test_combine_datetlike_udf(self, data): - # https://github.com/pandas-dev/pandas/issues/23079 - df = pd.DataFrame({"A": data}) - other = df.copy() - df.iloc[1, 0] = None - - def combiner(a, b): - return b - - result = df.combine(other, combiner) - tm.assert_frame_equal(result, other) - def test_concat_multiple_tzs(self): # GH 12467 # combining datetime tz-aware and naive DataFrames diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index df40c2e7e2a11..542d9835bb5d3 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -685,25 +685,6 @@ def test_boolean_comparison(self): with pytest.raises(ValueError, match=msg1d): result = df == tup - def test_combine_generic(self, float_frame): - df1 = float_frame - df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]] - - combined = df1.combine(df2, np.add) - combined2 = df2.combine(df1, np.add) - assert combined["D"].isna().all() - assert combined2["D"].isna().all() - - chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]] - chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]] - - exp = ( - float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk) - * 2 - ) - tm.assert_frame_equal(chunk, exp) - tm.assert_frame_equal(chunk2, exp) - def test_inplace_ops_alignment(self): # inplace ops / ops alignment From 972ab61d2a8f35561b1b55e66ce585c0984bdf9f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:02:41 -0800 Subject: [PATCH 143/388] Collect Series.append tests (#32254) --- pandas/tests/series/methods/test_append.py | 86 +++++++++++++++++++- pandas/tests/series/test_timezones.py | 91 +--------------------- 2 files changed, 86 insertions(+), 91 deletions(-) diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index 4d64b5b397981..4742d6ae3544f 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, DatetimeIndex, Series, date_range +from pandas import DataFrame, DatetimeIndex, Index, Series, Timestamp, date_range import pandas._testing as tm @@ -166,3 +166,87 @@ def test_append_tz_dateutil(self): appended = rng.append(rng2) tm.assert_index_equal(appended, rng3) + + def test_series_append_aware(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex( + ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern" + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC") + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + utc = rng1.tz + assert utc == ts_result.index.tz + + # GH#7795 + # different tz coerces to object dtype, not UTC + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + exp_index = Index( + [ + Timestamp("1/1/2011 01:00", tz="US/Eastern"), + Timestamp("1/1/2011 02:00", tz="US/Central"), + ] + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + + def test_series_append_aware_naive(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index.astype(object)) + assert ts_result.index.equals(expected) + + # mixed + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = range(100) + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index) + assert ts_result.index.equals(expected) + + def test_series_append_dst(self): + rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + ser1 = Series([1, 2, 3], index=rng1) + ser2 = Series([10, 11, 12], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex( + [ + "2016-01-01 01:00", + "2016-01-01 02:00", + "2016-01-01 03:00", + "2016-08-01 01:00", + "2016-08-01 02:00", + "2016-08-01 03:00", + ], + tz="US/Eastern", + ) + exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 74363f4c73c39..e729ff91293a8 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -10,101 +10,12 @@ from pandas._libs.tslibs import conversion, timezones -from pandas import DatetimeIndex, Index, Series, Timestamp +from pandas import Series, Timestamp import pandas._testing as tm from pandas.core.indexes.datetimes import date_range class TestSeriesTimezones: - # ----------------------------------------------------------------- - # Series.append - - def test_series_append_aware(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) - - exp_index = DatetimeIndex( - ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern" - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) - - exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC") - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - utc = rng1.tz - assert utc == ts_result.index.tz - - # GH#7795 - # different tz coerces to object dtype, not UTC - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) - exp_index = Index( - [ - Timestamp("1/1/2011 01:00", tz="US/Eastern"), - Timestamp("1/1/2011 02:00", tz="US/Central"), - ] - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - - def test_series_append_aware_naive(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) - - expected = ser1.index.astype(object).append(ser2.index.astype(object)) - assert ts_result.index.equals(expected) - - # mixed - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = range(100) - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) - - expected = ser1.index.astype(object).append(ser2.index) - assert ts_result.index.equals(expected) - - def test_series_append_dst(self): - rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - ser1 = Series([1, 2, 3], index=rng1) - ser2 = Series([10, 11, 12], index=rng2) - ts_result = ser1.append(ser2) - - exp_index = DatetimeIndex( - [ - "2016-01-01 01:00", - "2016-01-01 02:00", - "2016-01-01 03:00", - "2016-08-01 01:00", - "2016-08-01 02:00", - "2016-08-01 03:00", - ], - tz="US/Eastern", - ) - exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - # ----------------------------------------------------------------- - def test_dateutil_tzoffset_support(self): values = [188.5, 328.25] tzinfo = tzoffset(None, 7200) From 7c7048c3f012d8bd5e09af5757ee6efe0ab84a22 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:03:29 -0800 Subject: [PATCH 144/388] REF: test_to_timestamp (#32248) --- .../tests/frame/methods/test_to_timestamp.py | 103 +++++++++++++++++ pandas/tests/frame/test_period.py | 104 +----------------- 2 files changed, 105 insertions(+), 102 deletions(-) create mode 100644 pandas/tests/frame/methods/test_to_timestamp.py diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py new file mode 100644 index 0000000000000..ae7d2827e05a6 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -0,0 +1,103 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, + Timedelta, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +class TestToTimestamp: + def test_frame_to_time_stamp(self): + K = 5 + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + df = DataFrame(np.random.randn(len(index), K), index=index) + df["mix"] = "a" + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = df.to_timestamp("D", "end") + tm.assert_index_equal(result.index, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = df.to_timestamp("D", "start") + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq="A-DEC"): + return date_range( + to_datetime("1/1/2001") + delta, + to_datetime("12/31/2009") + delta, + freq=freq, + ) + + delta = timedelta(hours=23) + result = df.to_timestamp("H", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp("T", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + result = df.to_timestamp("S", "end") + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + # columns + df = df.T + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = df.to_timestamp("D", "end", axis=1) + tm.assert_index_equal(result.columns, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = df.to_timestamp("D", "start", axis=1) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23) + result = df.to_timestamp("H", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp("T", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + result = df.to_timestamp("S", "end", axis=1) + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + # invalid axis + with pytest.raises(ValueError, match="axis"): + df.to_timestamp(axis=2) + + result1 = df.to_timestamp("5t", axis=1) + result2 = df.to_timestamp("t", axis=1) + expected = date_range("2001-01-01", "2009-01-01", freq="AS") + assert isinstance(result1.columns, DatetimeIndex) + assert isinstance(result2.columns, DatetimeIndex) + tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) + tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) + # PeriodIndex.to_timestamp always use 'infer' + assert result1.columns.freqstr == "AS-JAN" + assert result2.columns.freqstr == "AS-JAN" diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index a6b2b334d3ec8..1ce13fd31ba88 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -1,19 +1,6 @@ -from datetime import timedelta - import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - PeriodIndex, - Timedelta, - date_range, - period_range, - to_datetime, -) + +from pandas import DataFrame, Index, PeriodIndex, period_range import pandas._testing as tm @@ -49,93 +36,6 @@ def test_frame_setitem(self): assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) - def test_frame_to_time_stamp(self): - K = 5 - index = period_range(freq="A", start="1/1/2001", end="12/1/2009") - df = DataFrame(np.random.randn(len(index), K), index=index) - df["mix"] = "a" - - exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") - result = df.to_timestamp("D", "end") - tm.assert_index_equal(result.index, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") - result = df.to_timestamp("D", "start") - tm.assert_index_equal(result.index, exp_index) - - def _get_with_delta(delta, freq="A-DEC"): - return date_range( - to_datetime("1/1/2001") + delta, - to_datetime("12/31/2009") + delta, - freq=freq, - ) - - delta = timedelta(hours=23) - result = df.to_timestamp("H", "end") - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp("T", "end") - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - result = df.to_timestamp("S", "end") - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - # columns - df = df.T - - exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") - result = df.to_timestamp("D", "end", axis=1) - tm.assert_index_equal(result.columns, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") - result = df.to_timestamp("D", "start", axis=1) - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23) - result = df.to_timestamp("H", "end", axis=1) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp("T", "end", axis=1) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") - tm.assert_index_equal(result.columns, exp_index) - - result = df.to_timestamp("S", "end", axis=1) - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") - tm.assert_index_equal(result.columns, exp_index) - - # invalid axis - with pytest.raises(ValueError, match="axis"): - df.to_timestamp(axis=2) - - result1 = df.to_timestamp("5t", axis=1) - result2 = df.to_timestamp("t", axis=1) - expected = pd.date_range("2001-01-01", "2009-01-01", freq="AS") - assert isinstance(result1.columns, DatetimeIndex) - assert isinstance(result2.columns, DatetimeIndex) - tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) - tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) - # PeriodIndex.to_timestamp always use 'infer' - assert result1.columns.freqstr == "AS-JAN" - assert result2.columns.freqstr == "AS-JAN" - def test_frame_index_to_string(self): index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") frame = DataFrame(np.random.randn(3, 4), index=index) From f3a9b7800a22d1999550dd7e90755c0f5afd675e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:06:05 -0800 Subject: [PATCH 145/388] CLN: generic tests (#32256) --- pandas/tests/generic/test_generic.py | 6 +- pandas/tests/generic/test_series.py | 7 - pandas/tests/internals/test_internals.py | 158 +++++++++-------------- 3 files changed, 66 insertions(+), 105 deletions(-) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 8e54de771a3e4..1b6cb8447c76d 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -187,8 +187,10 @@ def test_constructor_compound_dtypes(self): def f(dtype): return self._construct(shape=3, value=1, dtype=dtype) - msg = "compound dtypes are not implemented" - f"in the {self._typ.__name__} constructor" + msg = ( + "compound dtypes are not implemented " + f"in the {self._typ.__name__} constructor" + ) with pytest.raises(NotImplementedError, match=msg): f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 5aafd83da78fd..f119eb422a276 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -24,13 +24,6 @@ class TestSeries(Generic): _typ = Series _comparator = lambda self, x, y: tm.assert_series_equal(x, y) - def setup_method(self): - self.ts = tm.makeTimeSeries() # Was at top level in test_series - self.ts.name = "ts" - - self.series = tm.makeStringSeries() - self.series.name = "series" - def test_rename_mi(self): s = Series( [11, 21, 31], diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0c9ddbf5473b3..27b0500983afd 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -203,12 +203,6 @@ def create_mgr(descr, item_shape=None): class TestBlock: def setup_method(self, method): - # self.fblock = get_float_ex() # a,c,e - # self.cblock = get_complex_ex() # - # self.oblock = get_obj_ex() - # self.bool_block = get_bool_ex() - # self.int_block = get_int_ex() - self.fblock = create_block("float", [0, 2, 4]) self.cblock = create_block("complex", [7]) self.oblock = create_block("object", [1, 3]) @@ -254,22 +248,11 @@ def test_merge(self): tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals)) tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals)) - # TODO: merge with mixed type? - def test_copy(self): cop = self.fblock.copy() assert cop is not self.fblock assert_block_equal(self.fblock, cop) - def test_reindex_index(self): - pass - - def test_reindex_cast(self): - pass - - def test_insert(self): - pass - def test_delete(self): newb = self.fblock.copy() newb.delete(0) @@ -300,39 +283,7 @@ def test_delete(self): newb.delete(3) -class TestDatetimeBlock: - def test_can_hold_element(self): - block = create_block("datetime", [0]) - - # We will check that block._can_hold_element iff arr.__setitem__ works - arr = pd.array(block.values.ravel()) - - # coerce None - assert block._can_hold_element(None) - arr[0] = None - assert arr[0] is pd.NaT - - # coerce different types of datetime objects - vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] - for val in vals: - assert block._can_hold_element(val) - arr[0] = val - - val = date(2010, 10, 10) - assert not block._can_hold_element(val) - - msg = ( - "'value' should be a 'Timestamp', 'NaT', " - "or array of those. Got 'date' instead." - ) - with pytest.raises(TypeError, match=msg): - arr[0] = val - - class TestBlockManager: - def test_constructor_corner(self): - pass - def test_attrs(self): mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2") assert mgr.nblocks == 2 @@ -441,18 +392,6 @@ def test_set_change_dtype(self, mgr): mgr2.set("quux", tm.randn(N)) assert mgr2.get("quux").dtype == np.float_ - def test_set_change_dtype_slice(self): # GH8850 - cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) - df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) - df["2nd"] = df["2nd"] * 2.0 - - blocks = df._to_dict_of_blocks() - assert sorted(blocks.keys()) == ["float64", "int64"] - tm.assert_frame_equal( - blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]) - ) - tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:])) - def test_copy(self, mgr): cp = mgr.copy(deep=False) for blk, cp_blk in zip(mgr.blocks, cp.blocks): @@ -486,7 +425,7 @@ def test_sparse_mixed(self): assert len(mgr.blocks) == 3 assert isinstance(mgr, BlockManager) - # what to test here? + # TODO: what to test here? def test_as_array_float(self): mgr = create_mgr("c: f4; d: f2; e: f8") @@ -650,22 +589,6 @@ def test_interleave(self): mgr = create_mgr("a: M8[ns]; b: m8[ns]") assert mgr.as_array().dtype == "object" - def test_interleave_non_unique_cols(self): - df = DataFrame( - [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]], - columns=["x", "x"], - index=[1, 2], - ) - - df_unique = df.copy() - df_unique.columns = ["x", "y"] - assert df_unique.values.shape == df.values.shape - tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) - tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) - - def test_consolidate(self): - pass - def test_consolidate_ordering_issues(self, mgr): mgr.set("f", tm.randn(N)) mgr.set("d", tm.randn(N)) @@ -683,10 +606,6 @@ def test_consolidate_ordering_issues(self, mgr): cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64) ) - def test_reindex_index(self): - # TODO: should this be pytest.skip? - pass - def test_reindex_items(self): # mgr is not consolidated, f8 & f8-2 blocks mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2") @@ -767,13 +686,6 @@ def test_get_bool_data(self): def test_unicode_repr_doesnt_raise(self): repr(create_mgr("b,\u05d0: object")) - def test_missing_unicode_key(self): - df = DataFrame({"a": [1]}) - try: - df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError - except KeyError: - pass # this is the expected exception - def test_equals(self): # unique items bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2") @@ -843,8 +755,6 @@ class TestIndexing: create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N, N)), ] - # MANAGERS = [MANAGERS[6]] - @pytest.mark.parametrize("mgr", MANAGERS) def test_get_slice(self, mgr): def assert_slice_ok(mgr, axis, slobj): @@ -994,11 +904,6 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 1, 2], fill_value, ) - # test_get_slice(slice_like, axis) - # take(indexer, axis) - # reindex_axis(new_labels, axis) - # reindex_indexer(new_labels, indexer, axis) - class TestBlockPlacement: def test_slice_len(self): @@ -1151,6 +1056,33 @@ def any(self, axis=None): class TestCanHoldElement: + def test_datetime_block_can_hold_element(self): + block = create_block("datetime", [0]) + + # We will check that block._can_hold_element iff arr.__setitem__ works + arr = pd.array(block.values.ravel()) + + # coerce None + assert block._can_hold_element(None) + arr[0] = None + assert arr[0] is pd.NaT + + # coerce different types of datetime objects + vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] + for val in vals: + assert block._can_hold_element(val) + arr[0] = val + + val = date(2010, 10, 10) + assert not block._can_hold_element(val) + + msg = ( + "'value' should be a 'Timestamp', 'NaT', " + "or array of those. Got 'date' instead." + ) + with pytest.raises(TypeError, match=msg): + arr[0] = val + @pytest.mark.parametrize( "value, dtype", [ @@ -1280,3 +1212,37 @@ def test_dataframe_not_equal(): df1 = pd.DataFrame({"a": [1, 2], "b": ["s", "d"]}) df2 = pd.DataFrame({"a": ["s", "d"], "b": [1, 2]}) assert df1.equals(df2) is False + + +def test_missing_unicode_key(): + df = DataFrame({"a": [1]}) + with pytest.raises(KeyError, match="\u05d0"): + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError + + +def test_set_change_dtype_slice(): + # GH#8850 + cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) + df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) + df["2nd"] = df["2nd"] * 2.0 + + blocks = df._to_dict_of_blocks() + assert sorted(blocks.keys()) == ["float64", "int64"] + tm.assert_frame_equal( + blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]) + ) + tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:])) + + +def test_interleave_non_unique_cols(): + df = DataFrame( + [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]], + columns=["x", "x"], + index=[1, 2], + ) + + df_unique = df.copy() + df_unique.columns = ["x", "y"] + assert df_unique.values.shape == df.values.shape + tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) + tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) From a152c30a41ca944189b3ad2edf0c98ea8c4fecc2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:07:33 -0800 Subject: [PATCH 146/388] PERF: lazify consolidate_check and dont consolidate in get_slice (#32224) --- pandas/core/internals/managers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d4f9c15a9f73f..329bfdf543c62 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -141,7 +141,7 @@ def __init__( if do_integrity_check: self._verify_integrity() - self._consolidate_check() + self._known_consolidated = False self._rebuild_blknos_and_blklocs() @@ -726,7 +726,6 @@ def get_slice(self, slobj: slice, axis: int = 0): new_axes[axis] = new_axes[axis][slobj] bm = type(self)(new_blocks, new_axes, do_integrity_check=False) - bm._consolidate_inplace() return bm def __contains__(self, item) -> bool: From 89ed0958157bd599ea5770e62093a52cc04f59ae Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:10:39 -0800 Subject: [PATCH 147/388] CLN: simplify+annotate _shallow_copy (#32244) --- pandas/core/indexes/base.py | 14 ++++++-------- pandas/core/indexes/category.py | 7 +++++-- pandas/core/indexes/datetimelike.py | 15 +++++++-------- pandas/core/indexes/numeric.py | 4 ++-- pandas/core/indexes/period.py | 19 ++++++------------- pandas/core/indexes/range.py | 10 ++++++---- pandas/tests/indexes/period/test_period.py | 8 +------- 7 files changed, 33 insertions(+), 44 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index aa22527d8c2d7..67f2f05c8af1e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -8,7 +8,7 @@ from pandas._libs import algos as libalgos, index as libindex, lib import pandas._libs.join as libjoin -from pandas._libs.lib import is_datetime_array +from pandas._libs.lib import is_datetime_array, no_default from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp from pandas._libs.tslibs.period import IncompatibleFrequency from pandas._libs.tslibs.timezones import tz_compare @@ -485,7 +485,7 @@ def _get_attributes_dict(self): """ return {k: getattr(self, k, None) for k in self._attributes} - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): """ Create a new Index with the same class as the caller, don't copy the data, use the same object attributes with passed in attributes taking @@ -496,16 +496,14 @@ def _shallow_copy(self, values=None, **kwargs): Parameters ---------- values : the values to create the new Index, optional - kwargs : updates the default attributes for this Index + name : Label, defaults to self.name """ + name = self.name if name is no_default else name + if values is None: values = self.values - attributes = self._get_attributes_dict() - - attributes.update(kwargs) - - return self._simple_new(values, **attributes) + return self._simple_new(values, name=name) def _shallow_copy_with_infer(self, values, **kwargs): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index caa6a9a93141f..603ec486d943e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -7,6 +7,8 @@ from pandas._libs import index as libindex from pandas._libs.hashtable import duplicated_int64 +from pandas._libs.lib import no_default +from pandas._typing import Label from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -264,13 +266,14 @@ def _simple_new(cls, values, name=None, dtype=None): # -------------------------------------------------------------------- @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): + name = self.name if name is no_default else name + if values is None: values = self.values cat = Categorical(values, dtype=self.dtype) - name = kwargs.get("name", self.name) return type(self)._simple_new(cat, name=name) def _is_dtype_compat(self, other) -> bool: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1b3b6934aa53a..1abd58007c15f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -8,6 +8,7 @@ from pandas._libs import NaT, iNaT, join as libjoin, lib from pandas._libs.tslibs import timezones +from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly @@ -649,7 +650,9 @@ def _set_freq(self, freq): self._data._freq = freq - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = lib.no_default): + name = self.name if name is lib.no_default else name + if values is None: values = self._data @@ -657,18 +660,16 @@ def _shallow_copy(self, values=None, **kwargs): values = values._data if isinstance(values, np.ndarray): # TODO: We would rather not get here - if kwargs.get("freq") is not None: - raise ValueError(kwargs) values = type(self._data)(values, dtype=self.dtype) attributes = self._get_attributes_dict() - if "freq" not in kwargs and self.freq is not None: + if self.freq is not None: if isinstance(values, (DatetimeArray, TimedeltaArray)): if values.freq is None: del attributes["freq"] - attributes.update(kwargs) + attributes["name"] = name return type(self)._simple_new(values, **attributes) # -------------------------------------------------------------------- @@ -738,9 +739,7 @@ def intersection(self, other, sort=False): # this point, depending on the values. result._set_freq(None) - result = self._shallow_copy( - result._data, name=result.name, dtype=result.dtype, freq=None - ) + result = self._shallow_copy(result._data, name=result.name) if result.freq is None: result._set_freq("infer") return result diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 367870f0ee467..06a26cc90555e 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -3,7 +3,7 @@ import numpy as np from pandas._libs import index as libindex, lib -from pandas._typing import Dtype +from pandas._typing import Dtype, Label from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.cast import astype_nansafe @@ -103,7 +103,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): return self._maybe_cast_indexer(label) @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, name=lib.no_default): + def _shallow_copy(self, values=None, name: Label = lib.no_default): name = name if name is not lib.no_default else self.name if values is not None and not self._can_hold_na and values.dtype.kind == "f": diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0b85433b699a8..c7c11c60185b3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -5,9 +5,11 @@ import numpy as np from pandas._libs import index as libindex +from pandas._libs.lib import no_default from pandas._libs.tslibs import frequencies as libfrequencies, resolution from pandas._libs.tslibs.parsing import parse_time_string from pandas._libs.tslibs.period import Period +from pandas._typing import Label from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -248,8 +250,10 @@ def _has_complex_internals(self): # used to avoid libreduction code paths, which raise or require conversion return True - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): # TODO: simplify, figure out type of values + name = name if name is not no_default else self.name + if values is None: values = self._data @@ -263,18 +267,7 @@ def _shallow_copy(self, values=None, **kwargs): # GH#30713 this should never be reached raise TypeError(type(values), getattr(values, "dtype", None)) - # We don't allow changing `freq` in _shallow_copy. - validate_dtype_freq(self.dtype, kwargs.get("freq")) - attributes = self._get_attributes_dict() - - attributes.update(kwargs) - if not len(values) and "dtype" not in kwargs: - attributes["dtype"] = self.dtype - return self._simple_new(values, **attributes) - - def _shallow_copy_with_infer(self, values=None, **kwargs): - """ we always want to return a PeriodIndex """ - return self._shallow_copy(values=values, **kwargs) + return self._simple_new(values, name=name) def _maybe_convert_timedelta(self, other): """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d6752da6bc58f..fa8551bc646a6 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -7,6 +7,8 @@ import numpy as np from pandas._libs import index as libindex +from pandas._libs.lib import no_default +from pandas._typing import Label import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly @@ -385,13 +387,13 @@ def tolist(self): return list(self._range) @Appender(Int64Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = no_default): + name = self.name if name is no_default else name + if values is None: - name = kwargs.get("name", self.name) return self._simple_new(self._range, name=name) else: - kwargs.setdefault("name", self.name) - return self._int64index._shallow_copy(values, **kwargs) + return Int64Index._simple_new(values, name=name) @Appender(Int64Index.copy.__doc__) def copy(self, name=None, deep=False, dtype=None, **kwargs): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6479b14e9521e..40c7ffba46450 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -128,15 +128,9 @@ def test_shallow_copy_empty(self): def test_shallow_copy_i8(self): # GH-24391 pi = period_range("2018-01-01", periods=3, freq="2D") - result = pi._shallow_copy(pi.asi8, freq=pi.freq) + result = pi._shallow_copy(pi.asi8) tm.assert_index_equal(result, pi) - def test_shallow_copy_changing_freq_raises(self): - pi = period_range("2018-01-01", periods=3, freq="2D") - msg = "specified freq and dtype are different" - with pytest.raises(IncompatibleFrequency, match=msg): - pi._shallow_copy(pi, freq="H") - def test_view_asi8(self): idx = PeriodIndex([], freq="M") From 7c5d3d522a10e1e36f1fd79f3bdb9537e6186631 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:13:15 -0800 Subject: [PATCH 148/388] REF: include CategoricalIndex in index_cached parametrization (#32215) --- asv_bench/benchmarks/categoricals.py | 3 --- asv_bench/benchmarks/index_cached_properties.py | 3 +++ asv_bench/benchmarks/index_object.py | 8 -------- asv_bench/benchmarks/indexing.py | 5 +++++ asv_bench/benchmarks/period.py | 3 --- asv_bench/benchmarks/timedelta.py | 3 --- asv_bench/benchmarks/timeseries.py | 3 +++ 7 files changed, 11 insertions(+), 17 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 6f43a6fd3fc9b..107b9b9edcd5d 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -258,9 +258,6 @@ def setup(self): def time_get_loc(self): self.index.get_loc(self.category) - def time_shape(self): - self.index.shape - def time_shallow_copy(self): self.index._shallow_copy() diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py index 13b33855569c9..16fbc741775e4 100644 --- a/asv_bench/benchmarks/index_cached_properties.py +++ b/asv_bench/benchmarks/index_cached_properties.py @@ -7,6 +7,7 @@ class IndexCache: params = [ [ + "CategoricalIndex", "DatetimeIndex", "Float64Index", "IntervalIndex", @@ -42,6 +43,8 @@ def setup(self, index_type): self.idx = pd.Float64Index(range(N)) elif index_type == "UInt64Index": self.idx = pd.UInt64Index(range(N)) + elif index_type == "CategoricalIndex": + self.idx = pd.CategoricalIndex(range(N), range(N)) else: raise ValueError assert len(self.idx) == N diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index cf51a4d35f805..b242de6a17208 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -55,14 +55,6 @@ def time_datetime_difference_disjoint(self): self.datetime_left.difference(self.datetime_right) -class Datetime: - def setup(self): - self.dr = date_range("20000101", freq="D", periods=10000) - - def time_is_dates_only(self): - self.dr._is_dates_only - - class Range: def setup(self): self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 087fe3916845b..e98d2948e76ea 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,3 +1,8 @@ +""" +These benchmarks are for Series and DataFrame indexing methods. For the +lower-level methods directly on Index and subclasses, see index_object.py, +indexing_engine.py, and index_cached.py +""" import warnings import numpy as np diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index b52aa2e55af35..e15d4c66e4fc0 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -85,9 +85,6 @@ def setup(self): def time_get_loc(self): self.index.get_loc(self.period) - def time_shape(self): - self.index.shape - def time_shallow_copy(self): self.index._shallow_copy() diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 208c8f9d14a5e..cfe05c3e257b1 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -73,9 +73,6 @@ def setup(self): def time_get_loc(self): self.index.get_loc(self.timedelta) - def time_shape(self): - self.index.shape - def time_shallow_copy(self): self.index._shallow_copy() diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index e3ed33456ee44..6c9f8ee77e5ad 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -57,6 +57,9 @@ def time_to_date(self, index_type): def time_to_pydatetime(self, index_type): self.index.to_pydatetime() + def time_is_dates_only(self, index_type): + self.index._is_dates_only + class TzLocalize: From 2fc855935060752c8cc3b2a7ffb571e61959b89a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:14:23 -0800 Subject: [PATCH 149/388] CLN: simplify CategoricalIndex._simple_new (#32204) --- pandas/core/indexes/category.py | 40 ++++--------------- .../indexes/categorical/test_constructors.py | 9 ----- 2 files changed, 8 insertions(+), 41 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 603ec486d943e..67bed7bd77c7f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -19,7 +19,6 @@ is_scalar, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.generic import ABCCategorical, ABCSeries from pandas.core.dtypes.missing import isna from pandas.core import accessor @@ -195,7 +194,9 @@ def __new__( raise cls._scalar_data_error(data) data = [] - data = cls._create_categorical(data, dtype=dtype) + assert isinstance(dtype, CategoricalDtype), dtype + if not isinstance(data, Categorical) or data.dtype != dtype: + data = Categorical(data, dtype=dtype) data = data.copy() if copy else data @@ -225,37 +226,11 @@ def _create_from_codes(self, codes, dtype=None, name=None): return CategoricalIndex(cat, name=name) @classmethod - def _create_categorical(cls, data, dtype=None): - """ - *this is an internal non-public method* - - create the correct categorical from data and the properties - - Parameters - ---------- - data : data for new Categorical - dtype : CategoricalDtype, defaults to existing - - Returns - ------- - Categorical - """ - if isinstance(data, (cls, ABCSeries)) and is_categorical_dtype(data): - data = data.values - - if not isinstance(data, ABCCategorical): - return Categorical(data, dtype=dtype) - - if isinstance(dtype, CategoricalDtype) and dtype != data.dtype: - # we want to silently ignore dtype='category' - data = data._set_dtype(dtype) - return data - - @classmethod - def _simple_new(cls, values, name=None, dtype=None): + def _simple_new(cls, values: Categorical, name=None, dtype=None): + # GH#32204 dtype is included for compat with Index._simple_new + assert isinstance(values, Categorical), type(values) result = object.__new__(cls) - values = cls._create_categorical(values, dtype=dtype) result._data = values result.name = name @@ -298,7 +273,8 @@ def _is_dtype_compat(self, other) -> bool: values = other if not is_list_like(values): values = [values] - other = CategoricalIndex(self._create_categorical(other, dtype=self.dtype)) + cat = Categorical(other, dtype=self.dtype) + other = CategoricalIndex(cat) if not other.isin(values).all(): raise TypeError( "cannot append a non-category item to a CategoricalIndex" diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py index 1df0874e2f947..ee3f85da22781 100644 --- a/pandas/tests/indexes/categorical/test_constructors.py +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -136,12 +136,3 @@ def test_construction_with_categorical_dtype(self): with pytest.raises(ValueError, match=msg): Index(data, ordered=ordered, dtype=dtype) - - def test_create_categorical(self): - # GH#17513 The public CI constructor doesn't hit this code path with - # instances of CategoricalIndex, but we still want to test the code - ci = CategoricalIndex(["a", "b", "c"]) - # First ci is self, second ci is data. - result = CategoricalIndex._create_categorical(ci, ci) - expected = Categorical(["a", "b", "c"]) - tm.assert_categorical_equal(result, expected) From 9bf3a2884baa1310cedbbeea50dcfc3563bd3074 Mon Sep 17 00:00:00 2001 From: Jiaxiang Date: Wed, 26 Feb 2020 10:18:59 +0800 Subject: [PATCH 150/388] ENH: Timestamp constructor now raises more explanatory error message (#31653) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/tslibs/timestamps.pyx | 23 +++++++++++++++---- .../scalar/timestamp/test_constructors.py | 13 +++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 705c335acfb48..cbc7159f55b8f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -114,6 +114,7 @@ Datetimelike - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) - Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) +- :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b8c462abe35f1..9f3b4a8a554b5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -411,10 +411,25 @@ class Timestamp(_Timestamp): ) elif ts_input is _no_input: - # User passed keyword arguments. - ts_input = datetime(year, month, day, hour or 0, - minute or 0, second or 0, - microsecond or 0) + # GH 31200 + # When year, month or day is not given, we call the datetime + # constructor to make sure we get the same error message + # since Timestamp inherits datetime + datetime_kwargs = { + "hour": hour or 0, + "minute": minute or 0, + "second": second or 0, + "microsecond": microsecond or 0 + } + if year is not None: + datetime_kwargs["year"] = year + if month is not None: + datetime_kwargs["month"] = month + if day is not None: + datetime_kwargs["day"] = day + + ts_input = datetime(**datetime_kwargs) + elif is_integer_object(freq): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index b4a7173da84d0..4c75d1ebcd377 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -548,3 +548,16 @@ def test_timestamp_constructor_identity(): expected = Timestamp("2017-01-01T12") result = Timestamp(expected) assert result is expected + + +@pytest.mark.parametrize("kwargs", [{}, {"year": 2020}, {"year": 2020, "month": 1}]) +def test_constructor_missing_keyword(kwargs): + # GH 31200 + + # The exact error message of datetime() depends on its version + msg1 = r"function missing required argument '(year|month|day)' \(pos [123]\)" + msg2 = r"Required argument '(year|month|day)' \(pos [123]\) not found" + msg = "|".join([msg1, msg2]) + + with pytest.raises(TypeError, match=msg): + Timestamp(**kwargs) From 8b200c106c6a4aebba955469d6f9b2e27db8f047 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Tue, 25 Feb 2020 20:20:43 -0600 Subject: [PATCH 151/388] ENH: Implement DataFrame.value_counts (#31247) --- doc/source/getting_started/basics.rst | 11 ++ doc/source/reference/frame.rst | 1 + doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/base.py | 1 + pandas/core/frame.py | 102 +++++++++++++++++- .../tests/frame/methods/test_value_counts.py | 102 ++++++++++++++++++ 6 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/frame/methods/test_value_counts.py diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 277080006cb3c..c6d9a48fcf8ed 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -689,6 +689,17 @@ of a 1D array of values. It can also be used as a function on regular arrays: s.value_counts() pd.value_counts(data) +.. versionadded:: 1.1.0 + +The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns. +By default all columns are used but a subset can be selected using the ``subset`` argument. + +.. ipython:: python + + data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]} + frame = pd.DataFrame(data) + frame.value_counts() + Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame: .. ipython:: python diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index c7b1cc1c832be..b326bbb5a465e 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -170,6 +170,7 @@ Computations / descriptive stats DataFrame.std DataFrame.var DataFrame.nunique + DataFrame.value_counts Reindexing / selection / label manipulation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index cbc7159f55b8f..888b7d23aeb35 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -55,6 +55,7 @@ Other API changes - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) +- Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - diff --git a/pandas/core/base.py b/pandas/core/base.py index 56d3596f71813..85424e35fa0e0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1196,6 +1196,7 @@ def value_counts( -------- Series.count: Number of non-NA elements in a Series. DataFrame.count: Number of non-NA elements in a DataFrame. + DataFrame.value_counts: Equivalent method on DataFrames. Examples -------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7efb4fbb878d6..3fc10444ee064 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -111,7 +111,7 @@ from pandas.core.indexes import base as ibase from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences from pandas.core.indexes.datetimes import DatetimeIndex -from pandas.core.indexes.multi import maybe_droplevels +from pandas.core.indexes.multi import MultiIndex, maybe_droplevels from pandas.core.indexes.period import PeriodIndex from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable from pandas.core.internals import BlockManager @@ -4569,6 +4569,10 @@ def drop_duplicates( ------- DataFrame DataFrame with duplicates removed or None if ``inplace=True``. + + See Also + -------- + DataFrame.value_counts: Count unique combinations of columns. """ if self.empty: return self.copy() @@ -4814,6 +4818,102 @@ def sort_index( else: return self._constructor(new_data).__finalize__(self) + def value_counts( + self, + subset: Optional[Sequence[Label]] = None, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + ): + """ + Return a Series containing counts of unique rows in the DataFrame. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + + Returns + ------- + Series + + See Also + -------- + Series.value_counts: Equivalent method on Series. + + Notes + ----- + The returned Series will have a MultiIndex with one level per input + column. By default, rows that contain any NA values are omitted from + the result. By default, the resulting Series will be in descending + order so that the first element is the most frequently-occurring row. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6], + ... 'num_wings': [2, 0, 0, 0]}, + ... index=['falcon', 'dog', 'cat', 'ant']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + cat 4 0 + ant 6 0 + + >>> df.value_counts() + num_legs num_wings + 4 0 2 + 6 0 1 + 2 2 1 + dtype: int64 + + >>> df.value_counts(sort=False) + num_legs num_wings + 2 2 1 + 4 0 2 + 6 0 1 + dtype: int64 + + >>> df.value_counts(ascending=True) + num_legs num_wings + 2 2 1 + 6 0 1 + 4 0 2 + dtype: int64 + + >>> df.value_counts(normalize=True) + num_legs num_wings + 4 0 0.50 + 6 0 0.25 + 2 2 0.25 + dtype: float64 + """ + if subset is None: + subset = self.columns.tolist() + + counts = self.groupby(subset).size() + + if sort: + counts = counts.sort_values(ascending=ascending) + if normalize: + counts /= counts.sum() + + # Force MultiIndex for single column + if len(subset) == 1: + counts.index = MultiIndex.from_arrays( + [counts.index], names=[counts.index.name] + ) + + return counts + def nlargest(self, n, columns, keep="first") -> "DataFrame": """ Return the first `n` rows ordered by `columns` in descending order. diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py new file mode 100644 index 0000000000000..c409b0bbe6fa9 --- /dev/null +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -0,0 +1,102 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def test_data_frame_value_counts_unsorted(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(sort=False) + expected = pd.Series( + data=[1, 2, 1], + index=pd.MultiIndex.from_arrays( + [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_ascending(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(ascending=True) + expected = pd.Series( + data=[1, 1, 2], + index=pd.MultiIndex.from_arrays( + [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_default(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays( + [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_normalize(): + df = pd.DataFrame( + {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + index=["falcon", "dog", "cat", "ant"], + ) + + result = df.value_counts(normalize=True) + expected = pd.Series( + data=[0.5, 0.25, 0.25], + index=pd.MultiIndex.from_arrays( + [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"] + ), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_single_col_default(): + df = pd.DataFrame({"num_legs": [2, 4, 4, 6]}) + + result = df.value_counts() + expected = pd.Series( + data=[2, 1, 1], + index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]), + ) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts() + expected = pd.Series([], dtype=np.int64) + + tm.assert_series_equal(result, expected) + + +def test_data_frame_value_counts_empty_normalize(): + df_no_cols = pd.DataFrame() + + result = df_no_cols.value_counts(normalize=True) + expected = pd.Series([], dtype=np.float64) + + tm.assert_series_equal(result, expected) From 6c74f885515acfcfdc34546dbf1ba158065e9595 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:31:37 -0800 Subject: [PATCH 152/388] use ExtensionIndex._concat_same_dtype (#32232) --- pandas/core/indexes/datetimelike.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1abd58007c15f..349b582de4358 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -552,14 +552,6 @@ def _summary(self, name=None) -> str: result = result.replace("'", "") return result - def _concat_same_dtype(self, to_concat, name): - """ - Concatenate to_concat which has the same class. - """ - new_data = type(self._data)._concat_same_type(to_concat) - - return self._simple_new(new_data, name=name) - def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. From cf957ad01c4253cc233a34ccf7257f983d5d51a1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 19:59:05 -0800 Subject: [PATCH 153/388] REF: simplify IntervalIndex/IntervalArray _shallow_copy (#32247) --- pandas/core/arrays/interval.py | 39 ++++++--------------------------- pandas/core/indexes/interval.py | 18 +++++++++------ 2 files changed, 18 insertions(+), 39 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index b11736248c12a..f5167f470b056 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -725,45 +725,18 @@ def _concat_same_type(cls, to_concat): right = np.concatenate([interval.right for interval in to_concat]) return cls._simple_new(left, right, closed=closed, copy=False) - def _shallow_copy(self, left=None, right=None, closed=None): + def _shallow_copy(self, left, right): """ Return a new IntervalArray with the replacement attributes Parameters ---------- - left : array-like + left : Index Values to be used for the left-side of the intervals. - If None, the existing left and right values will be used. - - right : array-like + right : Index Values to be used for the right-side of the intervals. - If None and left is IntervalArray-like, the left and right - of the IntervalArray-like will be used. - - closed : {'left', 'right', 'both', 'neither'}, optional - Whether the intervals are closed on the left-side, right-side, both - or neither. If None, the existing closed will be used. """ - if left is None: - - # no values passed - left, right = self.left, self.right - - elif right is None: - - # only single value passed, could be an IntervalArray - # or array of Intervals - if not isinstance(left, (type(self), ABCIntervalIndex)): - left = type(self)(left) - - left, right = left.left, left.right - else: - - # both left and right are values - pass - - closed = closed or self.closed - return self._simple_new(left, right, closed=closed, verify_integrity=False) + return self._simple_new(left, right, closed=self.closed, verify_integrity=False) def copy(self): """ @@ -1035,7 +1008,9 @@ def set_closed(self, closed): msg = f"invalid option for 'closed': {closed}" raise ValueError(msg) - return self._shallow_copy(closed=closed) + return type(self)._simple_new( + left=self.left, right=self.right, closed=closed, verify_integrity=False + ) @property def length(self): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6ea4250e4acf4..b3923a1298859 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -333,11 +333,12 @@ def from_tuples( # -------------------------------------------------------------------- @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, left=None, right=None, **kwargs): - result = self._data._shallow_copy(left=left, right=right) + def _shallow_copy(self, values=None, **kwargs): + if values is None: + values = self._data attributes = self._get_attributes_dict() attributes.update(kwargs) - return self._simple_new(result, **attributes) + return self._simple_new(values, **attributes) @cache_readonly def _isnan(self): @@ -407,7 +408,7 @@ def astype(self, dtype, copy=True): with rewrite_exception("IntervalArray", type(self).__name__): new_values = self.values.astype(dtype, copy=copy) if is_interval_dtype(new_values): - return self._shallow_copy(new_values.left, new_values.right) + return self._shallow_copy(new_values) return Index.astype(self, dtype, copy=copy) @property @@ -881,7 +882,8 @@ def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) - return self._shallow_copy(values) + result = IntervalArray(values) + return self._shallow_copy(result) def delete(self, loc): """ @@ -893,7 +895,8 @@ def delete(self, loc): """ new_left = self.left.delete(loc) new_right = self.right.delete(loc) - return self._shallow_copy(new_left, new_right) + result = self._data._shallow_copy(new_left, new_right) + return self._shallow_copy(result) def insert(self, loc, item): """ @@ -927,7 +930,8 @@ def insert(self, loc, item): new_left = self.left.insert(loc, left_insert) new_right = self.right.insert(loc, right_insert) - return self._shallow_copy(new_left, new_right) + result = self._data._shallow_copy(new_left, new_right) + return self._shallow_copy(result) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): From 2885b3020d73206d28298c87d1052361e83d1bb2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 20:43:19 -0800 Subject: [PATCH 154/388] CLN/TST: parametrize some tests in tests.indexing.test_float (#32187) * TST: cleanups * troubleshoot * troubleshoot travis --- pandas/tests/indexing/test_floats.py | 87 +++++++++++----------------- 1 file changed, 33 insertions(+), 54 deletions(-) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 4d3f1b0539aee..87520f5ab2577 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -162,10 +162,9 @@ def test_scalar_non_numeric(self, index_func, klass): s2.loc[3.0] = 10 assert s2.index.is_object() - for idxr in [lambda x: x]: - s2 = s.copy() - idxr(s2)[3.0] = 0 - assert s2.index.is_object() + s2 = s.copy() + s2[3.0] = 0 + assert s2.index.is_object() @pytest.mark.parametrize( "index_func", @@ -250,12 +249,7 @@ def test_scalar_integer(self, index_func, klass): # integer index i = index_func(5) - - if klass is Series: - # TODO: Should we be passing index=i here? - obj = Series(np.arange(len(i))) - else: - obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i) + obj = gen_obj(klass, i) # coerce to equal int for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: @@ -313,7 +307,7 @@ def test_scalar_float(self, klass): result = idxr(s2)[indexer] self.check(result, s, 3, getitem) - # random integer is a KeyError + # random float is a KeyError with pytest.raises(KeyError, match=r"^3\.5$"): idxr(s)[3.5] @@ -429,15 +423,6 @@ def test_slice_integer(self): indexer = slice(3, 5) self.check(result, s, indexer, False) - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] - # getitem out-of-bounds for l in [slice(-6, 6), slice(-6.0, 6.0)]: @@ -485,23 +470,6 @@ def test_slice_integer(self): with pytest.raises(TypeError, match=msg): s[l] - # setitem - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: - - sc = s.copy() - sc.loc[l] = 0 - result = sc.loc[l].values.ravel() - assert (result == 0).all() - - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] = 0 - @pytest.mark.parametrize("l", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]) def test_integer_positional_indexing(self, l): """ make sure that we are raising on positional indexing @@ -584,22 +552,34 @@ def test_slice_integer_frame_getitem(self, index_func): with pytest.raises(TypeError, match=msg): s[l] + @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) + @pytest.mark.parametrize( + "index_func", [tm.makeIntIndex, tm.makeRangeIndex], + ) + def test_float_slice_getitem_with_integer_index_raises(self, l, index_func): + + # similar to above, but on the getitem dim (of a DataFrame) + index = index_func(5) + + s = DataFrame(np.random.randn(5, 2), index=index) + # setitem - for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + sc = s.copy() + sc.loc[l] = 0 + result = sc.loc[l].values.ravel() + assert (result == 0).all() - sc = s.copy() - sc.loc[l] = 0 - result = sc.loc[l].values.ravel() - assert (result == 0).all() + # positional indexing + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + with pytest.raises(TypeError, match=msg): + s[l] = 0 - # positional indexing - msg = ( - "cannot do slice indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): - s[l] = 0 + with pytest.raises(TypeError, match=msg): + s[l] @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) @pytest.mark.parametrize("klass", [Series, DataFrame]) @@ -614,10 +594,9 @@ def test_slice_float(self, l, klass): # getitem result = idxr(s)[l] - if isinstance(s, Series): - tm.assert_series_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) + assert isinstance(result, type(s)) + tm.assert_equal(result, expected) + # setitem s2 = s.copy() idxr(s2)[l] = 0 From a713063624fda50aa170176cad8e60e95362b8e3 Mon Sep 17 00:00:00 2001 From: Sergei Chipiga Date: Wed, 26 Feb 2020 07:50:35 +0200 Subject: [PATCH 155/388] Add link to pandas-tfrecords (#32143) --- doc/source/ecosystem.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index fb06ee122ae88..b7e53b84f0e02 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -56,6 +56,11 @@ joining paths, replacing file extensions, and checking if files exist are also a Statistics and machine learning ------------------------------- +`pandas-tfrecords `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Easy saving pandas dataframe to tensorflow tfrecords format and reading tfrecords to pandas. + `Statsmodels `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 1b8b4286193427bb87205753c9bec50e64122164 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 04:27:24 -0800 Subject: [PATCH 156/388] CLN: remove dtype kwarg from _simple_new (#32260) --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/category.py | 5 ++--- pandas/core/indexes/interval.py | 10 ++++------ pandas/core/indexes/period.py | 3 +-- pandas/core/indexes/range.py | 8 ++++---- pandas/tests/indexes/period/test_constructors.py | 12 ++++++------ 6 files changed, 19 insertions(+), 23 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 67f2f05c8af1e..c215fdb475ed8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -451,7 +451,7 @@ def asi8(self): return None @classmethod - def _simple_new(cls, values, name=None, dtype=None): + def _simple_new(cls, values, name: Label = None): """ We require that we have a dtype compat for the values. If we are passed a non-dtype compat, then coerce using the constructor. @@ -3310,7 +3310,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): values = range(0) else: values = self._data[:0] # appropriately-dtyped empty array - target = self._simple_new(values, dtype=self.dtype, **attrs) + target = self._simple_new(values, **attrs) else: target = ensure_index(target) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 67bed7bd77c7f..5f0d6ea2d6278 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -226,8 +226,7 @@ def _create_from_codes(self, codes, dtype=None, name=None): return CategoricalIndex(cat, name=name) @classmethod - def _simple_new(cls, values: Categorical, name=None, dtype=None): - # GH#32204 dtype is included for compat with Index._simple_new + def _simple_new(cls, values: Categorical, name: Label = None): assert isinstance(values, Categorical), type(values) result = object.__new__(cls) @@ -433,7 +432,7 @@ def where(self, cond, other=None): other = self._na_value values = np.where(cond, self.values, other) cat = Categorical(values, dtype=self.dtype) - return self._shallow_copy(cat) + return type(self)._simple_new(cat, name=self.name) def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b3923a1298859..a7bb4237eab69 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -9,7 +9,7 @@ from pandas._libs import Timedelta, Timestamp, lib from pandas._libs.interval import Interval, IntervalMixin, IntervalTree -from pandas._typing import AnyArrayLike +from pandas._typing import AnyArrayLike, Label from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.util._exceptions import rewrite_exception @@ -191,7 +191,7 @@ def func(intvidx_self, other, sort=False): class IntervalIndex(IntervalMixin, ExtensionIndex): _typ = "intervalindex" _comparables = ["name"] - _attributes = ["name", "closed"] + _attributes = ["name"] # we would like our indexing holder to defer to us _defer_to_indexing = True @@ -227,17 +227,15 @@ def __new__( return cls._simple_new(array, name) @classmethod - def _simple_new(cls, array, name, closed=None): + def _simple_new(cls, array: IntervalArray, name: Label = None): """ Construct from an IntervalArray Parameters ---------- array : IntervalArray - name : str + name : Label, default None Attached as result.name - closed : Any - Ignored. """ assert isinstance(array, IntervalArray), type(array) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c7c11c60185b3..35a5d99abf4e6 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -217,7 +217,7 @@ def __new__( return cls._simple_new(data, name=name) @classmethod - def _simple_new(cls, values, name=None, freq=None, **kwargs): + def _simple_new(cls, values: PeriodArray, name: Label = None): """ Create a new PeriodIndex. @@ -228,7 +228,6 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): or coercion. """ assert isinstance(values, PeriodArray), type(values) - assert freq is None or freq == values.freq, (freq, values.freq) result = object.__new__(cls) result._data = values diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index fa8551bc646a6..71cc62e6a110b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -95,7 +95,7 @@ def __new__( # RangeIndex if isinstance(start, RangeIndex): start = start._range - return cls._simple_new(start, dtype=dtype, name=name) + return cls._simple_new(start, name=name) # validate the arguments if com.all_none(start, stop, step): @@ -113,7 +113,7 @@ def __new__( raise ValueError("Step must not be zero") rng = range(start, stop, step) - return cls._simple_new(rng, dtype=dtype, name=name) + return cls._simple_new(rng, name=name) @classmethod def from_range(cls, data: range, name=None, dtype=None) -> "RangeIndex": @@ -131,10 +131,10 @@ def from_range(cls, data: range, name=None, dtype=None) -> "RangeIndex": ) cls._validate_dtype(dtype) - return cls._simple_new(data, dtype=dtype, name=name) + return cls._simple_new(data, name=name) @classmethod - def _simple_new(cls, values: range, name=None, dtype=None) -> "RangeIndex": + def _simple_new(cls, values: range, name: Label = None) -> "RangeIndex": result = object.__new__(cls) assert isinstance(values, range) diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index 418f53591b913..b5ff83ec7514d 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -322,9 +322,9 @@ def test_constructor_simple_new(self): idx = period_range("2007-01", name="p", periods=2, freq="M") with pytest.raises(AssertionError, match=""): - idx._simple_new(idx, name="p", freq=idx.freq) + idx._simple_new(idx, name="p") - result = idx._simple_new(idx._data, name="p", freq=idx.freq) + result = idx._simple_new(idx._data, name="p") tm.assert_index_equal(result, idx) with pytest.raises(AssertionError): @@ -339,19 +339,19 @@ def test_constructor_simple_new_empty(self): # GH13079 idx = PeriodIndex([], freq="M", name="p") with pytest.raises(AssertionError, match=""): - idx._simple_new(idx, name="p", freq="M") + idx._simple_new(idx, name="p") - result = idx._simple_new(idx._data, name="p", freq="M") + result = idx._simple_new(idx._data, name="p") tm.assert_index_equal(result, idx) @pytest.mark.parametrize("floats", [[1.1, 2.1], np.array([1.1, 2.1])]) def test_constructor_floats(self, floats): with pytest.raises(AssertionError, match=" Date: Wed, 26 Feb 2020 15:35:02 +0300 Subject: [PATCH 157/388] ENH: add fold support to Timestamp constructor (#31563) --- doc/source/user_guide/timeseries.rst | 29 ++++++ doc/source/whatsnew/v1.1.0.rst | 22 +++++ pandas/_libs/tslib.pyx | 32 ++++--- pandas/_libs/tslibs/conversion.pxd | 1 + pandas/_libs/tslibs/conversion.pyx | 69 +++++++++++++- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 54 +++++++++-- pandas/_libs/tslibs/tzconversion.pxd | 1 + pandas/_libs/tslibs/tzconversion.pyx | 85 +++++++++++++++-- .../indexes/datetimes/test_constructors.py | 94 ++++++++++++++++++- .../tests/scalar/timestamp/test_timezones.py | 2 +- 11 files changed, 354 insertions(+), 37 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 3fdab0fd26643..f208c8d576131 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2297,6 +2297,35 @@ To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None) # tz_convert(None) is identical to tz_convert('UTC').tz_localize(None) didx.tz_convert('UTC').tz_localize(None) +.. _timeseries.fold: + +Fold +~~~~ + +.. versionadded:: 1.1.0 + +For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument. +Due to daylight saving time, one wall clock time can occur twice when shifting +from summer to winter time; fold describes whether the datetime-like corresponds +to the first (0) or the second time (1) the wall clock hits the ambiguous time. +Fold is supported only for constructing from naive ``datetime.datetime`` +(see `datetime documentation `__ for details) or from :class:`Timestamp` +or for constructing from components (see below). Only ``dateutil`` timezones are supported +(see `dateutil documentation `__ +for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` +timezones do not support fold (see `pytz documentation `__ +for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime +with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely +on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct +control over how they are handled. + +.. ipython:: python + + pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), + tz='dateutil/Europe/London', fold=0) + pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz='dateutil/Europe/London', fold=1) + .. _timeseries.timezone_ambiguous: Ambiguous times when localizing diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 888b7d23aeb35..2b64b85863def 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -36,6 +36,28 @@ For example: ser["2014"] ser.loc["May 2015"] +.. _whatsnew_110.timestamp_fold_support: + +Fold argument support in Timestamp constructor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Timestamp:` now supports the keyword-only fold argument according to `PEP 495 `_ similar to parent ``datetime.datetime`` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to ``dateutil`` timezones as ``pytz`` doesn't support fold. + +For example: + +.. ipython:: python + + ts = pd.Timestamp("2019-10-27 01:30:00+00:00") + ts.fold + +.. ipython:: python + + ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz="dateutil/Europe/London", fold=1) + ts + +For more on working with fold, see :ref:`Fold subsection ` in the user guide. + .. _whatsnew_110.enhancements.other: Other enhancements diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 53e3354ca8eb6..a176c4e41e834 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -49,30 +49,31 @@ from pandas._libs.tslibs.tzconversion cimport ( cdef inline object create_datetime_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.datetime from its parts """ return datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) + dts.min, dts.sec, dts.us, tz, fold=fold) cdef inline object create_date_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.date from its parts """ + # GH 25057 add fold argument to match other func_create signatures return date(dts.year, dts.month, dts.day) cdef inline object create_time_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.time from its parts """ - return time(dts.hour, dts.min, dts.sec, dts.us, tz) + return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) @cython.wraparound(False) @cython.boundscheck(False) def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, - str box="datetime"): + bint fold=0, str box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -83,6 +84,13 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, convert to this timezone freq : str/Offset, default None freq to convert + fold : bint, default is 0 + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + .. versionadded:: 1.1.0 box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' If datetime, convert to datetime.datetime If date, convert to datetime.date @@ -104,7 +112,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, str typ int64_t value, delta, local_value ndarray[object] result = np.empty(n, dtype=object) - object (*func_create)(int64_t, npy_datetimestruct, object, object) + object (*func_create)(int64_t, npy_datetimestruct, object, object, bint) if box == "date": assert (tz is None), "tz should be None when converting to date" @@ -129,7 +137,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, result[i] = NaT else: dt64_to_dtstruct(value, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) elif is_tzlocal(tz): for i in range(n): value = arr[i] @@ -141,7 +149,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, # using the i8 representation. local_value = tz_convert_utc_to_tzlocal(value, tz) dt64_to_dtstruct(local_value, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) else: trans, deltas, typ = get_dst_info(tz) @@ -155,7 +163,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, else: # Adjust datetime64 timestamp, recompute datetimestruct dt64_to_dtstruct(value + delta, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) elif typ == 'dateutil': # no zone-name change for dateutil tzs - dst etc @@ -168,7 +176,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) else: # pytz for i in range(n): @@ -182,7 +190,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, new_tz = tz._tzinfos[tz._transition_info[pos]] dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, new_tz, freq) + result[i] = func_create(value, dts, new_tz, freq, fold) return result diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index c74307a3d2887..bb20296e24587 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -12,6 +12,7 @@ cdef class _TSObject: npy_datetimestruct dts # npy_datetimestruct int64_t value # numpy dt64 object tzinfo + bint fold cdef convert_to_tsobject(object ts, object tz, object unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6e978d495c325..57483783faf9f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,7 +39,8 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.tzconversion import ( tz_localize_to_utc, tz_convert_single) -from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc +from pandas._libs.tslibs.tzconversion cimport ( + _tz_convert_tzlocal_utc, _tz_convert_tzlocal_fromutc) # ---------------------------------------------------------------------- # Constants @@ -215,6 +216,11 @@ cdef class _TSObject: # npy_datetimestruct dts # npy_datetimestruct # int64_t value # numpy dt64 # object tzinfo + # bint fold + + def __cinit__(self): + # GH 25057. As per PEP 495, set fold to 0 by default + self.fold = 0 @property def value(self): @@ -322,6 +328,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, cdef: _TSObject obj = _TSObject() + obj.fold = ts.fold if tz is not None: tz = maybe_get_tz(tz) @@ -380,6 +387,8 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt + ndarray[int64_t] trans + int64_t[:] deltas value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -389,10 +398,23 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, check_overflows(obj) return obj + # Infer fold from offset-adjusted obj.value + # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + if is_utc(tz): + pass + elif is_tzlocal(tz): + _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) + else: + trans, deltas, typ = get_dst_info(tz) + + if typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo) + obj.dts.us, obj.tzinfo, fold=obj.fold) obj = convert_datetime_to_tsobject( dt, tz, nanos=obj.dts.ps // 1000) return obj @@ -543,7 +565,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) + local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) dt64_to_dtstruct(local_val, &obj.dts) else: # Adjust datetime64 timestamp, recompute datetimestruct @@ -562,6 +584,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): # i.e. treat_tz_as_dateutil(tz) pos = trans.searchsorted(obj.value, side='right') - 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + # dateutil supports fold, so we infer fold from value + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, @@ -571,6 +595,45 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): obj.tzinfo = tz +cdef inline bint _infer_tsobject_fold(_TSObject obj, ndarray[int64_t] trans, + int64_t[:] deltas, int32_t pos): + """ + Infer _TSObject fold property from value by assuming 0 and then setting + to 1 if necessary. + + Parameters + ---------- + obj : _TSObject + trans : ndarray[int64_t] + ndarray of offset transition points in nanoseconds since epoch. + deltas : int64_t[:] + array of offsets corresponding to transition points in trans. + pos : int32_t + Position of the last transition point before taking fold into account. + + Returns + ------- + bint + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + References + ---------- + .. [1] "PEP 495 - Local Time Disambiguation" + https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + """ + cdef: + bint fold = 0 + + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + fold = 1 + + return fold + cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index b7282e02ff117..5e55e6e8d5297 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -5,4 +5,4 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct cdef object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq) + object tz, object freq, bint fold) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9f3b4a8a554b5..5cd3467eed042 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -6,12 +6,12 @@ from numpy cimport int64_t cnp.import_array() from datetime import time as datetime_time, timedelta -from cpython.datetime cimport (datetime, +from cpython.datetime cimport (datetime, PyDateTime_Check, PyTZInfo_Check, PyDateTime_IMPORT) PyDateTime_IMPORT from pandas._libs.tslibs.util cimport ( - is_integer_object, is_offset_object) + is_datetime64_object, is_float_object, is_integer_object, is_offset_object) from pandas._libs.tslibs.c_timestamp cimport _Timestamp cimport pandas._libs.tslibs.ccalendar as ccalendar @@ -41,12 +41,12 @@ _no_input = object() cdef inline object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a Timestamp from its parts """ cdef _Timestamp ts_base ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) + dts.sec, dts.us, tz, fold=fold) ts_base.value = value ts_base.freq = freq ts_base.nanosecond = dts.ps // 1000 @@ -195,6 +195,13 @@ class Timestamp(_Timestamp): nanosecond : int, optional, default 0 .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None + fold : {0, 1}, default None, keyword-only + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + .. versionadded:: 1.1.0 Notes ----- @@ -350,7 +357,9 @@ class Timestamp(_Timestamp): second=None, microsecond=None, nanosecond=None, - tzinfo=None + tzinfo=None, + *, + fold=None ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. @@ -390,6 +399,32 @@ class Timestamp(_Timestamp): # User passed tzinfo instead of tz; avoid silently ignoring tz, tzinfo = tzinfo, None + # Allow fold only for unambiguous input + if fold is not None: + if fold not in [0, 1]: + raise ValueError( + "Valid values for the fold argument are None, 0, or 1." + ) + + if (ts_input is not _no_input and not ( + PyDateTime_Check(ts_input) and + getattr(ts_input, 'tzinfo', None) is None)): + raise ValueError( + "Cannot pass fold with possibly unambiguous input: int, " + "float, numpy.datetime64, str, or timezone-aware " + "datetime-like. Pass naive datetime-like or build " + "Timestamp from components." + ) + + if tz is not None and treat_tz_as_pytz(tz): + raise ValueError( + "pytz timezones do not support fold. Please use dateutil " + "timezones." + ) + + if hasattr(ts_input, 'fold'): + ts_input = ts_input.replace(fold=fold) + # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed # checking verbosely, because cython doesn't optimize @@ -419,7 +454,8 @@ class Timestamp(_Timestamp): "hour": hour or 0, "minute": minute or 0, "second": second or 0, - "microsecond": microsecond or 0 + "microsecond": microsecond or 0, + "fold": fold or 0 } if year is not None: datetime_kwargs["year"] = year @@ -435,7 +471,7 @@ class Timestamp(_Timestamp): # Timestamp(year, month, day[, hour[, minute[, second[, # microsecond[, nanosecond[, tzinfo]]]]]]) ts_input = datetime(ts_input, freq, tz, unit or 0, - year or 0, month or 0, day or 0) + year or 0, month or 0, day or 0, fold=fold or 0) nanosecond = hour tz = minute freq = None @@ -455,7 +491,7 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): if self.tz is not None: @@ -999,7 +1035,7 @@ default 'raise' if value != NPY_NAT: check_dts_bounds(&dts) - return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) + return create_timestamp_from_ts(value, dts, _tzinfo, self.freq, fold) def isoformat(self, sep='T'): base = super(_Timestamp, self).isoformat(sep=sep) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 9c86057b0a392..c1dd88e5b2313 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -4,4 +4,5 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index b368f0fde3edc..a9702f91107ec 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -444,12 +444,12 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): return converted -cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): +cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, + bint to_utc, + bint *fold=NULL): """ - Convert the i8 representation of a datetime from a tzlocal timezone to - UTC, or vice-versa. - - Private, not intended for use outside of tslibs.conversion + Calculate offset in nanoseconds needed to convert the i8 representation of + a datetime from a tzlocal timezone to UTC, or vice-versa. Parameters ---------- @@ -457,15 +457,22 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction + fold : bint*, default NULL + pointer to fold: whether datetime ends up in a fold or not + after adjustment Returns ------- - result : int64_t + delta : int64_t + + Notes + ----- + Sets fold by pointer """ cdef: npy_datetimestruct dts - int64_t delta datetime dt + int64_t delta dt64_to_dtstruct(val, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, @@ -475,11 +482,69 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): if not to_utc: dt = dt.replace(tzinfo=tzutc()) dt = dt.astimezone(tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - if not to_utc: + if fold is not NULL: + fold[0] = dt.fold + + return int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + + +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): + """ + Convert the i8 representation of a datetime from a tzlocal timezone to + UTC, or vice-versa. + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting tzlocal _to_ UTC, False if going the other direction + + Returns + ------- + result : int64_t + """ + cdef int64_t delta + + delta = _tzlocal_get_offset_components(val, tz, to_utc, NULL) + + if to_utc: + return val - delta + else: return val + delta - return val - delta + + +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): + """ + Convert the i8 representation of a datetime from UTC to local timezone, + set fold by pointer + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + fold : bint* + pointer to fold: whether datetime ends up in a fold or not + after adjustment + + Returns + ------- + result : int64_t + + Notes + ----- + Sets fold by pointer + """ + cdef int64_t delta + + delta = _tzlocal_get_offset_components(val, tz, False, fold) + + return val + delta @cython.boundscheck(False) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 1d1d371fcec1e..b293c008d6683 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from functools import partial from operator import attrgetter @@ -959,3 +959,95 @@ def test_pass_datetimeindex_to_index(self): expected = Index(rng.to_pydatetime(), dtype=object) tm.assert_numpy_array_equal(idx.values, expected.values) + + +def test_timestamp_constructor_invalid_fold_raise(): + # Test for #25057 + # Valid fold values are only [None, 0, 1] + msg = "Valid values for the fold argument are None, 0, or 1." + with pytest.raises(ValueError, match=msg): + Timestamp(123, fold=2) + + +def test_timestamp_constructor_pytz_fold_raise(): + # Test for #25057 + # pytz doesn't support fold. Check that we raise + # if fold is passed with pytz + msg = "pytz timezones do not support fold. Please use dateutil timezones." + tz = pytz.timezone("Europe/London") + with pytest.raises(ValueError, match=msg): + Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) + + +@pytest.mark.parametrize("fold", [0, 1]) +@pytest.mark.parametrize( + "ts_input", + [ + 1572136200000000000, + 1572136200000000000.0, + np.datetime64(1572136200000000000, "ns"), + "2019-10-27 01:30:00+01:00", + datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), + ], +) +def test_timestamp_constructor_fold_conflict(ts_input, fold): + # Test for #25057 + # Check that we raise on fold conflict + msg = ( + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." + ) + with pytest.raises(ValueError, match=msg): + Timestamp(ts_input=ts_input, fold=fold) + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None]) +@pytest.mark.parametrize("fold", [0, 1]) +def test_timestamp_constructor_retain_fold(tz, fold): + # Test for #25057 + # Check that we retain fold + ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) + result = ts.fold + expected = fold + assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold_out", + [ + (1572136200000000000, 0), + (1572139800000000000, 1), + ("2019-10-27 01:30:00+01:00", 0), + ("2019-10-27 01:30:00+00:00", 1), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), + ], +) +def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): + # Test for #25057 + # Check that we infer fold correctly based on timestamps since utc + # or strings + ts = pd.Timestamp(ts_input, tz=tz) + result = ts.fold + expected = fold_out + assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold,value_out", + [ + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), + ], +) +def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): + # Test for #25057 + # Check that we adjust value for fold correctly + # based on timestamps since utc + ts = pd.Timestamp(ts_input, tz=tz, fold=fold) + result = ts.value + expected = value_out + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 6537f6ccd8432..cfa7da810ada1 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -140,7 +140,7 @@ def test_tz_localize_ambiguous_compat(self): # see gh-14621 assert result_pytz.to_pydatetime().tzname() == "GMT" assert result_dateutil.to_pydatetime().tzname() == "BST" - assert str(result_pytz) != str(result_dateutil) + assert str(result_pytz) == str(result_dateutil) # 1 hour difference result_pytz = naive.tz_localize(pytz_zone, ambiguous=1) From 27f0000804072e8d65e6c7939ec09069e1b9790e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 04:36:35 -0800 Subject: [PATCH 158/388] REGR: preserve freq in DTI/TDI outer join (#32166) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/indexes/datetimelike.py | 17 +++++++---------- pandas/tests/indexes/datetimes/test_join.py | 13 +++++++++++++ pandas/tests/indexes/timedeltas/test_join.py | 12 ++++++++++++ 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index f491774991090..e91bab0925bf7 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -23,6 +23,7 @@ Fixed regressions - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) +- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 349b582de4358..72a2aba2d8a88 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -910,17 +910,14 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool: return True return False - def _wrap_joined_index(self, joined, other): + def _wrap_joined_index(self, joined: np.ndarray, other): + assert other.dtype == self.dtype, (other.dtype, self.dtype) name = get_op_result_name(self, other) - if self._can_fast_union(other): - joined = self._shallow_copy(joined) - joined.name = name - return joined - else: - kwargs = {} - if hasattr(self, "tz"): - kwargs["tz"] = getattr(other, "tz", None) - return type(self)._simple_new(joined, name, **kwargs) + + freq = self.freq if self._can_fast_union(other) else None + new_data = type(self._data)._simple_new(joined, dtype=self.dtype, freq=freq) + + return type(self)._simple_new(new_data, name=name) # -------------------------------------------------------------------- # List-Like Methods diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index e4d6958dbd3d8..f2f88fd7dc90c 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -129,3 +129,16 @@ def test_naive_aware_conflicts(self): with pytest.raises(TypeError, match=msg): aware.join(naive) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_join_preserves_freq(self, tz): + # GH#32157 + dti = date_range("2016-01-01", periods=10, tz=tz) + result = dti[:5].join(dti[5:], how="outer") + assert result.freq == dti.freq + tm.assert_index_equal(result, dti) + + result = dti[:5].join(dti[6:], how="outer") + assert result.freq is None + expected = dti.delete(5) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py index 3e73ed35dae96..aaf4ef29e162b 100644 --- a/pandas/tests/indexes/timedeltas/test_join.py +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -35,3 +35,15 @@ def test_does_not_convert_mixed_integer(self): assert cols.dtype == np.dtype("O") assert cols.dtype == joined.dtype tm.assert_index_equal(cols, joined) + + def test_join_preserves_freq(self): + # GH#32157 + tdi = timedelta_range("1 day", periods=10) + result = tdi[:5].join(tdi[5:], how="outer") + assert result.freq == tdi.freq + tm.assert_index_equal(result, tdi) + + result = tdi[:5].join(tdi[6:], how="outer") + assert result.freq is None + expected = tdi.delete(5) + tm.assert_index_equal(result, expected) From 35537dddab75f82b83d4dc05f0f42404f459673a Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Wed, 26 Feb 2020 06:39:55 -0600 Subject: [PATCH 159/388] BUG: Cast pd.NA to pd.NaT in to_datetime (#32214) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/tslibs/nattype.pyx | 4 +++- pandas/tests/indexes/datetimes/test_tools.py | 7 +++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index e91bab0925bf7..1b6098e6b6ac1 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -63,6 +63,7 @@ Bug fixes **Datetimelike** - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) +- Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`) **Categorical** diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 9f6f401a1a5f5..68a25d0cc481a 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -22,6 +22,8 @@ from pandas._libs.tslibs.util cimport ( get_nat, is_integer_object, is_float_object, is_datetime64_object, is_timedelta64_object) +from pandas._libs.missing cimport C_NA + # ---------------------------------------------------------------------- # Constants @@ -763,7 +765,7 @@ NaT = c_NaT # Python-visible cdef inline bint checknull_with_nat(object val): """ utility to check if a value is a nat or not """ - return val is None or util.is_nan(val) or val is c_NaT + return val is None or util.is_nan(val) or val is c_NaT or val is C_NA cpdef bint is_null_datetimelike(object val, bint inat_is_null=True): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 13723f6455bff..e1f04d3d4489b 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -2315,3 +2315,10 @@ def test_nullable_integer_to_datetime(): tm.assert_series_equal(res, expected) # Check that ser isn't mutated tm.assert_series_equal(ser, ser_copy) + + +@pytest.mark.parametrize("klass", [np.array, list]) +def test_na_to_datetime(nulls_fixture, klass): + result = pd.to_datetime(klass([nulls_fixture])) + + assert result[0] is pd.NaT From 95352463938b1ae63fe02bbcac123a7a86002422 Mon Sep 17 00:00:00 2001 From: Dom Date: Wed, 26 Feb 2020 12:43:53 +0000 Subject: [PATCH 160/388] Added message to pytest raises for test_constructor_dict (#32236) --- pandas/tests/frame/test_constructors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8c9b7cd060059..14162bc433317 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -316,7 +316,8 @@ def test_constructor_dict(self): # mix dict and array, wrong size - no spec for which error should raise # first - with pytest.raises(ValueError): + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." + with pytest.raises(ValueError, match=msg): DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) # Length-one dict micro-optimization From e39cd30f98ee0ec3fd662059c39e77f5e16f41fa Mon Sep 17 00:00:00 2001 From: Ram Rachum Date: Wed, 26 Feb 2020 14:44:50 +0200 Subject: [PATCH 161/388] Fix exception causes in 14 modules (#32235) --- pandas/io/sql.py | 10 ++++---- pandas/io/stata.py | 24 ++++++++++---------- pandas/tests/extension/arrow/arrays.py | 4 ++-- pandas/tests/extension/decimal/array.py | 6 +++-- pandas/tests/extension/json/array.py | 8 +++---- pandas/tests/extension/list/array.py | 8 +++---- pandas/tests/indexes/datetimes/test_tools.py | 4 ++-- pandas/tests/io/test_sql.py | 16 ++++++------- pandas/tests/reshape/merge/test_join.py | 8 +++---- pandas/tests/reshape/test_concat.py | 4 ++-- pandas/tests/tseries/offsets/common.py | 4 ++-- pandas/tseries/frequencies.py | 12 +++++----- pandas/tseries/offsets.py | 8 +++---- pandas/util/_tester.py | 8 +++---- 14 files changed, 63 insertions(+), 61 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e97872d880dee..9a53e7cd241e1 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -238,8 +238,8 @@ def read_sql_table( meta = MetaData(con, schema=schema) try: meta.reflect(only=[table_name], views=True) - except sqlalchemy.exc.InvalidRequestError: - raise ValueError(f"Table {table_name} not found") + except sqlalchemy.exc.InvalidRequestError as err: + raise ValueError(f"Table {table_name} not found") from err pandas_sql = SQLDatabase(con, meta=meta) table = pandas_sql.read_table( @@ -685,7 +685,7 @@ def insert_data(self): try: temp.reset_index(inplace=True) except ValueError as err: - raise ValueError(f"duplicate name in index/columns: {err}") + raise ValueError(f"duplicate name in index/columns: {err}") from err else: temp = self.frame @@ -1387,8 +1387,8 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): def _get_unicode_name(name): try: uname = str(name).encode("utf-8", "strict").decode("utf-8") - except UnicodeError: - raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") + except UnicodeError as err: + raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") from err return uname diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 593228e99477b..0397dfa923afb 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1161,8 +1161,8 @@ def f(typ: int) -> Union[int, str]: return typ try: return self.TYPE_MAP_XML[typ] - except KeyError: - raise ValueError(f"cannot convert stata types [{typ}]") + except KeyError as err: + raise ValueError(f"cannot convert stata types [{typ}]") from err typlist = [f(x) for x in raw_typlist] @@ -1171,8 +1171,8 @@ def g(typ: int) -> Union[str, np.dtype]: return str(typ) try: return self.DTYPE_MAP_XML[typ] - except KeyError: - raise ValueError(f"cannot convert stata dtype [{typ}]") + except KeyError as err: + raise ValueError(f"cannot convert stata dtype [{typ}]") from err dtyplist = [g(x) for x in raw_typlist] @@ -1296,14 +1296,14 @@ def _read_old_header(self, first_char: bytes) -> None: try: self.typlist = [self.TYPE_MAP[typ] for typ in typlist] - except ValueError: + except ValueError as err: invalid_types = ",".join(str(x) for x in typlist) - raise ValueError(f"cannot convert stata types [{invalid_types}]") + raise ValueError(f"cannot convert stata types [{invalid_types}]") from err try: self.dtyplist = [self.DTYPE_MAP[typ] for typ in typlist] - except ValueError: + except ValueError as err: invalid_dtypes = ",".join(str(x) for x in typlist) - raise ValueError(f"cannot convert stata dtypes [{invalid_dtypes}]") + raise ValueError(f"cannot convert stata dtypes [{invalid_dtypes}]") from err if self.format_version > 108: self.varlist = [ @@ -1761,7 +1761,7 @@ def _do_convert_categoricals( categories.append(category) # Partially labeled try: cat_data.categories = categories - except ValueError: + except ValueError as err: vc = Series(categories).value_counts() repeated_cats = list(vc.index[vc > 1]) repeats = "-" * 80 + "\n" + "\n".join(repeated_cats) @@ -1777,7 +1777,7 @@ def _do_convert_categoricals( The repeated labels are: {repeats} """ - raise ValueError(msg) + raise ValueError(msg) from err # TODO: is the next line needed above in the data(...) method? cat_series = Series(cat_data, index=data.index) cat_converted_data.append((col, cat_series)) @@ -3143,11 +3143,11 @@ def _write_variable_labels(self) -> None: raise ValueError("Variable labels must be 80 characters or fewer") try: encoded = label.encode(self._encoding) - except UnicodeEncodeError: + except UnicodeEncodeError as err: raise ValueError( "Variable labels must contain only characters that " f"can be encoded in {self._encoding}" - ) + ) from err bio.write(_pad_bytes_new(encoded, vl_len + 1)) else: diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index cd4b43c83340f..ffebc9f8b3359 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -148,8 +148,8 @@ def _reduce(self, method, skipna=True, **kwargs): try: op = getattr(arr, method) - except AttributeError: - raise TypeError + except AttributeError as err: + raise TypeError from err return op(**kwargs) def any(self, axis=0, out=None): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2614d8c72c342..9384ed5199c1f 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -183,8 +183,10 @@ def _reduce(self, name, skipna=True, **kwargs): try: op = getattr(self.data, name) - except AttributeError: - raise NotImplementedError(f"decimal does not support the {name} operation") + except AttributeError as err: + raise NotImplementedError( + f"decimal does not support the {name} operation" + ) from err return op(axis=0) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index a229a824d0f9b..1f026e405dc17 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -137,13 +137,13 @@ def take(self, indexer, allow_fill=False, fill_value=None): output = [ self.data[loc] if loc != -1 else fill_value for loc in indexer ] - except IndexError: - raise IndexError(msg) + except IndexError as err: + raise IndexError(msg) from err else: try: output = [self.data[loc] for loc in indexer] - except IndexError: - raise IndexError(msg) + except IndexError as err: + raise IndexError(msg) from err return self._from_sequence(output) diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 7c1da5e8102e2..d86f90e58d897 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -86,13 +86,13 @@ def take(self, indexer, allow_fill=False, fill_value=None): output = [ self.data[loc] if loc != -1 else fill_value for loc in indexer ] - except IndexError: - raise IndexError(msg) + except IndexError as err: + raise IndexError(msg) from err else: try: output = [self.data[loc] for loc in indexer] - except IndexError: - raise IndexError(msg) + except IndexError as err: + raise IndexError(msg) from err return self._from_sequence(output) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index e1f04d3d4489b..0a774e9c0f008 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1110,8 +1110,8 @@ def test_unit(self, cache): for val in ["foo", Timestamp("20130101")]: try: to_datetime(val, errors="raise", unit="s", cache=cache) - except tslib.OutOfBoundsDatetime: - raise AssertionError("incorrect exception raised") + except tslib.OutOfBoundsDatetime as err: + raise AssertionError("incorrect exception raised") from err except ValueError: pass diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0ad9f2c1e941f..fc3876eee9d66 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2575,19 +2575,19 @@ def setup_class(cls): pymysql.connect(host="localhost", user="root", passwd="", db="pandas_nosetest") try: pymysql.connect(read_default_group="pandas") - except pymysql.ProgrammingError: + except pymysql.ProgrammingError as err: raise RuntimeError( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf." - ) - except pymysql.Error: + ) from err + except pymysql.Error as err: raise RuntimeError( "Cannot connect to database. " "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf." - ) + ) from err @pytest.fixture(autouse=True) def setup_method(self, request, datapath): @@ -2595,19 +2595,19 @@ def setup_method(self, request, datapath): pymysql.connect(host="localhost", user="root", passwd="", db="pandas_nosetest") try: pymysql.connect(read_default_group="pandas") - except pymysql.ProgrammingError: + except pymysql.ProgrammingError as err: raise RuntimeError( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf." - ) - except pymysql.Error: + ) from err + except pymysql.Error as err: raise RuntimeError( "Cannot connect to database. " "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf." - ) + ) from err self.method = request.function diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 685995ee201f8..725157b7c8523 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -810,11 +810,11 @@ def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix try: lgroup = left_grouped.get_group(group_key) - except KeyError: + except KeyError as err: if how in ("left", "inner"): raise AssertionError( f"key {group_key} should not have been in the join" - ) + ) from err _assert_all_na(l_joined, left.columns, join_col) else: @@ -822,11 +822,11 @@ def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix try: rgroup = right_grouped.get_group(group_key) - except KeyError: + except KeyError as err: if how in ("right", "inner"): raise AssertionError( f"key {group_key} should not have been in the join" - ) + ) from err _assert_all_na(r_joined, right.columns, join_col) else: diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 5811f3bc196a1..afd8f4178f741 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1849,8 +1849,8 @@ def __len__(self) -> int: def __getitem__(self, index): try: return {0: df1, 1: df2}[index] - except KeyError: - raise IndexError + except KeyError as err: + raise IndexError from err tm.assert_frame_equal(pd.concat(CustomIterator1(), ignore_index=True), expected) diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py index 71953fd095882..25837c0b6aee2 100644 --- a/pandas/tests/tseries/offsets/common.py +++ b/pandas/tests/tseries/offsets/common.py @@ -11,11 +11,11 @@ def assert_offset_equal(offset, base, expected): assert actual == expected assert actual_swapped == expected assert actual_apply == expected - except AssertionError: + except AssertionError as err: raise AssertionError( f"\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" f"\nAt Date: {base}" - ) + ) from err def assert_is_on_offset(offset, date, expected): diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index af34180fb3170..1a1b7e8e1bd08 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -141,8 +141,8 @@ def to_offset(freq) -> Optional[DateOffset]: delta = offset else: delta = delta + offset - except ValueError: - raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) + except ValueError as err: + raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) from err else: delta = None @@ -173,8 +173,8 @@ def to_offset(freq) -> Optional[DateOffset]: delta = offset else: delta = delta + offset - except (ValueError, TypeError): - raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) + except (ValueError, TypeError) as err: + raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) from err if delta is None: raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) @@ -223,9 +223,9 @@ def _get_offset(name: str) -> DateOffset: # handles case where there's no suffix (and will TypeError if too # many '-') offset = klass._from_name(*split[1:]) - except (ValueError, TypeError, KeyError): + except (ValueError, TypeError, KeyError) as err: # bad prefix or suffix - raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) + raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) from err # cache _offset_map[name] = offset diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 959dd19a50d90..b6bbe008812cb 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2530,12 +2530,12 @@ def _tick_comp(op): def f(self, other): try: return op(self.delta, other.delta) - except AttributeError: + except AttributeError as err: # comparing with a non-Tick object raise TypeError( f"Invalid comparison between {type(self).__name__} " f"and {type(other).__name__}" - ) + ) from err f.__name__ = f"__{op.__name__}__" return f @@ -2570,10 +2570,10 @@ def __add__(self, other): return self.apply(other) except ApplyTypeError: return NotImplemented - except OverflowError: + except OverflowError as err: raise OverflowError( f"the add operation between {self} and {other} will overflow" - ) + ) from err def __eq__(self, other: Any) -> bool: if isinstance(other, str): diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index b299f3790ab22..1bdf0d8483c76 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -10,12 +10,12 @@ def test(extra_args=None): try: import pytest - except ImportError: - raise ImportError("Need pytest>=5.0.1 to run tests") + except ImportError as err: + raise ImportError("Need pytest>=5.0.1 to run tests") from err try: import hypothesis # noqa - except ImportError: - raise ImportError("Need hypothesis>=3.58 to run tests") + except ImportError as err: + raise ImportError("Need hypothesis>=3.58 to run tests") from err cmd = ["--skip-slow", "--skip-network", "--skip-db"] if extra_args: if not isinstance(extra_args, list): From e6bd49f4f27d2a9353aee04d326d911fb843596a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 04:53:45 -0800 Subject: [PATCH 162/388] use numexpr for Series comparisons (#32047) --- pandas/core/ops/__init__.py | 3 ++- pandas/core/ops/array_ops.py | 30 +++++++++++++++---------- pandas/tests/arithmetic/test_numeric.py | 3 +-- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 1ee4c8e85be6b..d0adf2da04db3 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -510,6 +510,7 @@ def _comp_method_SERIES(cls, op, special): Wrapper function for Series arithmetic operations, to avoid code duplication. """ + str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @unpack_zerodim_and_defer(op_name) @@ -523,7 +524,7 @@ def wrapper(self, other): lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) - res_values = comparison_op(lvalues, rvalues, op) + res_values = comparison_op(lvalues, rvalues, op, str_rep) return _construct_result(self, res_values, index=self.index, name=res_name) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 10e3f32de3958..b216a927f65b3 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -126,7 +126,7 @@ def na_op(x, y): return na_op -def na_arithmetic_op(left, right, op, str_rep: str): +def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = False): """ Return the result of evaluating op on the passed in values. @@ -137,6 +137,8 @@ def na_arithmetic_op(left, right, op, str_rep: str): left : np.ndarray right : np.ndarray or scalar str_rep : str or None + is_cmp : bool, default False + If this a comparison operation. Returns ------- @@ -151,8 +153,18 @@ def na_arithmetic_op(left, right, op, str_rep: str): try: result = expressions.evaluate(op, str_rep, left, right) except TypeError: + if is_cmp: + # numexpr failed on comparison op, e.g. ndarray[float] > datetime + # In this case we do not fall back to the masked op, as that + # will handle complex numbers incorrectly, see GH#32047 + raise result = masked_arith_op(left, right, op) + if is_cmp and (is_scalar(result) or result is NotImplemented): + # numpy returned a scalar instead of operating element-wise + # e.g. numeric array vs str + return invalid_comparison(left, right, op) + return missing.dispatch_fill_zeros(op, left, right, result) @@ -199,7 +211,9 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): return res_values -def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: +def comparison_op( + left: ArrayLike, right: Any, op, str_rep: Optional[str] = None, +) -> ArrayLike: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. @@ -244,16 +258,8 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) else: - op_name = f"__{op.__name__}__" - method = getattr(lvalues, op_name) with np.errstate(all="ignore"): - res_values = method(rvalues) - - if res_values is NotImplemented: - res_values = invalid_comparison(lvalues, rvalues, op) - if is_scalar(res_values): - typ = type(rvalues) - raise TypeError(f"Could not compare {typ} type with Series") + res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep, is_cmp=True) return res_values @@ -380,7 +386,7 @@ def get_array_op(op, str_rep: Optional[str] = None): """ op_name = op.__name__.strip("_") if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: - return partial(comparison_op, op=op) + return partial(comparison_op, op=op, str_rep=str_rep) elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: return partial(logical_op, op=op) else: diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 51d09a92773b1..d4baf2f374cdf 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -66,8 +66,7 @@ def test_df_numeric_cmp_dt64_raises(self): ts = pd.Timestamp.now() df = pd.DataFrame({"x": range(5)}) - msg = "Invalid comparison between dtype=int64 and Timestamp" - + msg = "'[<>]' not supported between instances of 'Timestamp' and 'int'" with pytest.raises(TypeError, match=msg): df > ts with pytest.raises(TypeError, match=msg): From d8115ce4dad973729ac2648acaaf3d72ba8cd453 Mon Sep 17 00:00:00 2001 From: Akbar Septriyan Date: Thu, 27 Feb 2020 03:35:41 +0700 Subject: [PATCH 163/388] DOC: Fix pandas.index.copy summary documentation (#32006) --- pandas/core/indexes/base.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c215fdb475ed8..b570243b87a93 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -796,20 +796,24 @@ def repeat(self, repeats, axis=None): def copy(self, name=None, deep=False, dtype=None, names=None): """ - Make a copy of this object. Name and dtype sets those attributes on - the new object. + Make a copy of this object. + + Name and dtype sets those attributes on the new object. Parameters ---------- - name : Label + name : Label, optional + Set name for new object. deep : bool, default False dtype : numpy dtype or pandas type, optional + Set dtype for new object. names : list-like, optional Kept for compatibility with MultiIndex. Should not be used. Returns ------- Index + Index refer to new object which is a copy of this object. Notes ----- From 9a8e83ae7dfe6fdb3c7d9e26cae291d9ebb1a0de Mon Sep 17 00:00:00 2001 From: sage Date: Thu, 27 Feb 2020 03:38:47 +0700 Subject: [PATCH 164/388] DOC: Improve documentation for Index.where (#32009) --- pandas/core/indexes/base.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b570243b87a93..b5e323fbd0fa4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3937,18 +3937,35 @@ def memory_usage(self, deep: bool = False) -> int: def where(self, cond, other=None): """ - Return an Index of same shape as self and whose corresponding - entries are from self where cond is True and otherwise are from - other. + Replace values where the condition is False. + + The replacement is taken from other. Parameters ---------- cond : bool array-like with the same length as self - other : scalar, or array-like + Condition to select the values on. + other : scalar, or array-like, default None + Replacement if the condition is False. Returns ------- - Index + pandas.Index + A copy of self with values replaced from other + where the condition is False. + + See Also + -------- + Series.where : Same method for Series. + DataFrame.where : Same method for DataFrame. + + Examples + -------- + >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) + >>> idx + Index(['car', 'bike', 'train', 'tractor'], dtype='object') + >>> idx.where(idx.isin(['car', 'train']), 'other') + Index(['car', 'other', 'train', 'other'], dtype='object') """ if other is None: other = self._na_value From 52a63ab42b8845725bedb21ee3d8acafc672801c Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Thu, 27 Feb 2020 03:53:53 +0700 Subject: [PATCH 165/388] DOC: Fix errors in pandas.Series.argmax (#32019) --- pandas/core/base.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 85424e35fa0e0..b9aeb32eea5c1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -927,22 +927,50 @@ def max(self, axis=None, skipna=True, *args, **kwargs): def argmax(self, axis=None, skipna=True, *args, **kwargs): """ - Return an ndarray of the maximum argument indexer. + Return int position of the largest value in the Series. + + If the maximum is achieved in multiple locations, + the first row position is returned. Parameters ---------- axis : {None} Dummy argument for consistency with Series. skipna : bool, default True + Exclude NA/null values when showing the result. + *args, **kwargs + Additional arguments and keywords for compatibility with NumPy. Returns ------- - numpy.ndarray - Indices of the maximum values. + int + Row position of the maximum values. See Also -------- - numpy.ndarray.argmax + numpy.ndarray.argmax : Equivalent method for numpy arrays. + Series.argmin : Similar method, but returning the minimum. + Series.idxmax : Return index label of the maximum values. + Series.idxmin : Return index label of the minimum values. + + Examples + -------- + Consider dataset containing cereal calories + + >>> s = pd.Series({'Corn Flakes': 100.0, 'Almond Delight': 110.0, + ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}) + >>> s + Corn Flakes 100.0 + Almond Delight 110.0 + Cinnamon Toast Crunch 120.0 + Cocoa Puff 110.0 + dtype: float64 + + >>> s.argmax() + 2 + + The maximum cereal calories is in the third element, + since series is zero-indexed. """ nv.validate_minmax_axis(axis) nv.validate_argmax_with_skipna(skipna, args, kwargs) From ed7adcd51a9c1c44f57bcc6f6241956ce1bd047c Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 26 Feb 2020 21:08:04 +0000 Subject: [PATCH 166/388] CI: Remove docs build from pipelines (#32074) --- .github/workflows/ci.yml | 35 ++++++++++++++++++- azure-pipelines.yml | 75 ---------------------------------------- 2 files changed, 34 insertions(+), 76 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d87fa5203bd52..7493be34d10c7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -154,6 +154,39 @@ jobs: echo "region = BHS" >> $RCLONE_CONFIG_PATH if: github.event_name == 'push' - - name: Sync web + - name: Sync web with OVH run: rclone sync pandas_web ovh_cloud_pandas_web:dev if: github.event_name == 'push' + + - name: Create git repo to upload the built docs to GitHub pages + run: | + cd pandas_web + git init + touch .nojekyll + echo "dev.pandas.io" > CNAME + printf "User-agent: *\nDisallow: /" > robots.txt + git add --all . + git config user.email "pandas-dev@python.org" + git config user.name "pandas-bot" + git commit -m "pandas web and documentation in master" + if: github.event_name == 'push' + + # For this task to work, next steps are required: + # 1. Generate a pair of private/public keys (i.e. `ssh-keygen -t rsa -b 4096 -C "your_email@example.com"`) + # 2. Go to https://github.com/pandas-dev/pandas/settings/secrets + # 3. Click on "Add a new secret" + # 4. Name: "github_pagas_ssh_key", Value: + # 5. The public key needs to be upladed to https://github.com/pandas-dev/pandas-dev.github.io/settings/keys + - name: Install GitHub pages ssh deployment key + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ secrets.github_pages_ssh_key }} + known_hosts: 'github.com,192.30.252.128 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==' + if: github.event_name == 'push' + + - name: Publish web and docs to GitHub pages + run: | + cd pandas_web + git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git + git push -f origin master + if: github.event_name == 'push' diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d992c64073476..42a039af46e94 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -15,78 +15,3 @@ jobs: parameters: name: Windows vmImage: vs2017-win2016 - -- job: 'Web_and_Docs' - pool: - vmImage: ubuntu-16.04 - timeoutInMinutes: 90 - steps: - - script: | - echo '##vso[task.setvariable variable=ENV_FILE]environment.yml' - echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' - displayName: 'Setting environment variables' - - - script: | - sudo apt-get install -y libc6-dev-i386 - ci/setup_env.sh - displayName: 'Setup environment and build pandas' - - - script: | - source activate pandas-dev - python web/pandas_web.py web/pandas --target-path=web/build - displayName: 'Build website' - - - script: | - source activate pandas-dev - # Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547) - doc/make.py --warnings-are-errors | tee sphinx.log ; SPHINX_RET=${PIPESTATUS[0]} - grep -B1 "^<<<-------------------------------------------------------------------------$" sphinx.log ; IPY_RET=$(( $? != 1 )) - exit $(( $SPHINX_RET + $IPY_RET )) - displayName: 'Build documentation' - - - script: | - mkdir -p to_deploy/docs - cp -r web/build/* to_deploy/ - cp -r doc/build/html/* to_deploy/docs/ - displayName: 'Merge website and docs' - - - script: | - cd to_deploy - git init - touch .nojekyll - echo "dev.pandas.io" > CNAME - printf "User-agent: *\nDisallow: /" > robots.txt - git add --all . - git config user.email "pandas-dev@python.org" - git config user.name "pandas-bot" - git commit -m "pandas web and documentation in master" - displayName: 'Create git repo for docs build' - condition : | - and(not(eq(variables['Build.Reason'], 'PullRequest')), - eq(variables['Build.SourceBranch'], 'refs/heads/master')) - - # For `InstallSSHKey@0` to work, next steps are required: - # 1. Generate a pair of private/public keys (i.e. `ssh-keygen -t rsa -b 4096 -C "your_email@example.com"`) - # 2. Go to "Library > Secure files" in the Azure Pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles - # 3. Click on "+ Secure file" - # 4. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key") - # 5. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save - # 6. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be set as a deploy key of the repo where the docs will be pushed (with write access): https://github.com/pandas-dev/pandas-dev.github.io/settings/keys - - task: InstallSSHKey@0 - inputs: - hostName: 'github.com,192.30.252.128 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==' - sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== pandas-dev@python.org' - sshKeySecureFile: 'pandas_docs_key' - displayName: 'Install GitHub ssh deployment key' - condition : | - and(not(eq(variables['Build.Reason'], 'PullRequest')), - eq(variables['Build.SourceBranch'], 'refs/heads/master')) - - - script: | - cd to_deploy - git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git - git push -f origin master - displayName: 'Publish web and docs to GitHub pages' - condition : | - and(not(eq(variables['Build.Reason'], 'PullRequest')), - eq(variables['Build.SourceBranch'], 'refs/heads/master')) From 2c060b449bffb1d2dd06c7ee26f548787d1fa2a9 Mon Sep 17 00:00:00 2001 From: RaisaDZ <34237447+RaisaDZ@users.noreply.github.com> Date: Thu, 27 Feb 2020 01:15:49 +0000 Subject: [PATCH 167/388] DOC: add missing links to introduction to pandas (#32198) --- doc/source/getting_started/10min.rst | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/doc/source/getting_started/10min.rst b/doc/source/getting_started/10min.rst index a635b5656bd2d..9994287c827e3 100644 --- a/doc/source/getting_started/10min.rst +++ b/doc/source/getting_started/10min.rst @@ -39,7 +39,7 @@ and labeled columns: df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) df -Creating a ``DataFrame`` by passing a dict of objects that can be converted to series-like. +Creating a :class:`DataFrame` by passing a dict of objects that can be converted to series-like. .. ipython:: python @@ -51,7 +51,7 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s 'F': 'foo'}) df2 -The columns of the resulting ``DataFrame`` have different +The columns of the resulting :class:`DataFrame` have different :ref:`dtypes `. .. ipython:: python @@ -169,7 +169,7 @@ See the indexing documentation :ref:`Indexing and Selecting Data ` and Getting ~~~~~~~ -Selecting a single column, which yields a ``Series``, +Selecting a single column, which yields a :class:`Series`, equivalent to ``df.A``: .. ipython:: python @@ -469,10 +469,10 @@ Concatenating pandas objects together with :func:`concat`: pd.concat(pieces) .. note:: - Adding a column to a ``DataFrame`` is relatively fast. However, adding + Adding a column to a :class:`DataFrame` is relatively fast. However, adding a row requires a copy, and may be expensive. We recommend passing a - pre-built list of records to the ``DataFrame`` constructor instead - of building a ``DataFrame`` by iteratively appending records to it. + pre-built list of records to the :class:`DataFrame` constructor instead + of building a :class:`DataFrame` by iteratively appending records to it. See :ref:`Appending to dataframe ` for more. Join @@ -520,7 +520,7 @@ See the :ref:`Grouping section `. 'D': np.random.randn(8)}) df -Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting +Grouping and then applying the :meth:`~pandas.core.groupby.GroupBy.sum` function to the resulting groups. .. ipython:: python @@ -528,7 +528,7 @@ groups. df.groupby('A').sum() Grouping by multiple columns forms a hierarchical index, and again we can -apply the ``sum`` function. +apply the :meth:`~pandas.core.groupby.GroupBy.sum` function. .. ipython:: python @@ -648,7 +648,7 @@ the quarter end: Categoricals ------------ -pandas can include categorical data in a ``DataFrame``. For full docs, see the +pandas can include categorical data in a :class:`DataFrame`. For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python @@ -664,14 +664,13 @@ Convert the raw grades to a categorical data type. df["grade"] Rename the categories to more meaningful names (assigning to -``Series.cat.categories`` is inplace!). +:meth:`Series.cat.categories` is inplace!). .. ipython:: python df["grade"].cat.categories = ["very good", "good", "very bad"] -Reorder the categories and simultaneously add the missing categories (methods under ``Series -.cat`` return a new ``Series`` by default). +Reorder the categories and simultaneously add the missing categories (methods under :meth:`Series.cat` return a new :class:`Series` by default). .. ipython:: python From 86219708e5688c9cbe473c1af45a37f3cac82828 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 20:44:57 -0800 Subject: [PATCH 168/388] TST: move misplaced to_datetime test (#32273) --- pandas/tests/indexes/datetimes/test_tools.py | 82 ++++++++++++++++++- pandas/tests/series/test_timeseries.py | 84 +------------------- 2 files changed, 82 insertions(+), 84 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 0a774e9c0f008..ecfecfb414326 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -2,7 +2,7 @@ import calendar from collections import deque -from datetime import datetime, time +from datetime import datetime, time, timedelta import locale from dateutil.parser import parse @@ -1376,6 +1376,86 @@ def test_to_datetime_errors_ignore_utc_true(self): expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC") tm.assert_index_equal(result, expected) + # TODO: this is moved from tests.series.test_timeseries, may be redundant + def test_to_datetime_unit(self): + + epoch = 1370745748 + s = Series([epoch + t for t in range(20)]) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + ) + tm.assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)]).astype(float) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + ) + tm.assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + # GH13834 + s = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) + result = to_datetime(s, unit="s") + expected = Series( + [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) + for t in np.arange(0, 2, 0.25) + ] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + s = pd.concat( + [Series([epoch + t for t in range(20)]).astype(float), Series([np.nan])], + ignore_index=True, + ) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + result = to_datetime([1, 2, "NaT", pd.NaT, np.nan], unit="D") + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3 + ) + tm.assert_index_equal(result, expected) + + msg = "non convertible value foo with the unit 'D'" + with pytest.raises(ValueError, match=msg): + to_datetime([1, 2, "foo"], unit="D") + msg = "cannot convert input 111111111 with the unit 'D'" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime([1, 2, 111111111], unit="D") + + # coerce we can process + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1 + ) + result = to_datetime([1, 2, "foo"], unit="D", errors="coerce") + tm.assert_index_equal(result, expected) + + result = to_datetime([1, 2, 111111111], unit="D", errors="coerce") + tm.assert_index_equal(result, expected) + class TestToDatetimeMisc: def test_to_datetime_barely_out_of_bounds(self): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 8f06ea69f5d66..544634a2d16e9 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,11 +1,10 @@ -from datetime import datetime, timedelta +from datetime import datetime from io import StringIO import numpy as np import pytest from pandas._libs.tslib import iNaT -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime import pandas as pd from pandas import ( @@ -14,10 +13,8 @@ NaT, Series, Timestamp, - concat, date_range, timedelta_range, - to_datetime, ) import pandas._testing as tm @@ -127,85 +124,6 @@ def test_contiguous_boolean_preserve_freq(self): masked = rng[mask] assert masked.freq is None - def test_to_datetime_unit(self): - - epoch = 1370745748 - s = Series([epoch + t for t in range(20)]) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - ) - tm.assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)]).astype(float) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - ) - tm.assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - + [NaT] - ) - tm.assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - + [NaT] - ) - tm.assert_series_equal(result, expected) - - # GH13834 - s = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) - result = to_datetime(s, unit="s") - expected = Series( - [ - Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) - for t in np.arange(0, 2, 0.25) - ] - + [NaT] - ) - tm.assert_series_equal(result, expected) - - s = concat( - [Series([epoch + t for t in range(20)]).astype(float), Series([np.nan])], - ignore_index=True, - ) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - + [NaT] - ) - tm.assert_series_equal(result, expected) - - result = to_datetime([1, 2, "NaT", pd.NaT, np.nan], unit="D") - expected = DatetimeIndex( - [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3 - ) - tm.assert_index_equal(result, expected) - - msg = "non convertible value foo with the unit 'D'" - with pytest.raises(ValueError, match=msg): - to_datetime([1, 2, "foo"], unit="D") - msg = "cannot convert input 111111111 with the unit 'D'" - with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime([1, 2, 111111111], unit="D") - - # coerce we can process - expected = DatetimeIndex( - [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1 - ) - result = to_datetime([1, 2, "foo"], unit="D", errors="coerce") - tm.assert_index_equal(result, expected) - - result = to_datetime([1, 2, 111111111], unit="D", errors="coerce") - tm.assert_index_equal(result, expected) - def test_series_ctor_datetime64(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") dates = np.asarray(rng) From 97c0ce962b4e05a2dc8464640b5083df27ea98a1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 03:47:24 -0800 Subject: [PATCH 169/388] Troubleshoot CI (#32284) --- pandas/tests/frame/test_constructors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 14162bc433317..72fc0010cbbce 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -9,6 +9,7 @@ import pytest from pandas.compat import is_platform_little_endian +from pandas.compat.numpy import _is_numpy_dev from pandas.core.dtypes.common import is_integer_dtype @@ -144,6 +145,7 @@ def test_constructor_dtype_list_data(self): assert df.loc[1, 0] is None assert df.loc[0, 1] == "2" + @pytest.mark.xfail(_is_numpy_dev, reason="Interprets list of frame as 3D") def test_constructor_list_frames(self): # see gh-3243 result = DataFrame([DataFrame()]) @@ -503,6 +505,7 @@ def test_constructor_error_msgs(self): with pytest.raises(ValueError, match=msg): DataFrame({"a": False, "b": True}) + @pytest.mark.xfail(_is_numpy_dev, reason="Interprets embedded frame as 3D") def test_constructor_with_embedded_frames(self): # embedded data frames From eb6f8d3c25790a07c3daee8243c8fb4372a5c423 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 04:35:34 -0800 Subject: [PATCH 170/388] test_droplevel (#32272) --- pandas/tests/frame/methods/test_droplevel.py | 23 +++++++++++++++++++ pandas/tests/frame/test_alter_axes.py | 19 --------------- pandas/tests/series/methods/test_droplevel.py | 19 +++++++++++++++ pandas/tests/series/test_alter_axes.py | 13 ----------- 4 files changed, 42 insertions(+), 32 deletions(-) create mode 100644 pandas/tests/frame/methods/test_droplevel.py create mode 100644 pandas/tests/series/methods/test_droplevel.py diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py new file mode 100644 index 0000000000000..517905cf23259 --- /dev/null +++ b/pandas/tests/frame/methods/test_droplevel.py @@ -0,0 +1,23 @@ +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +class TestDropLevel: + def test_droplevel(self): + # GH#20342 + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) + df = df.set_index([0, 1]).rename_axis(["a", "b"]) + df.columns = MultiIndex.from_tuples( + [("c", "e"), ("d", "f")], names=["level_1", "level_2"] + ) + + # test that dropping of a level in index works + expected = df.reset_index("a", drop=True) + result = df.droplevel("a", axis="index") + tm.assert_frame_equal(result, expected) + + # test that dropping of a level in columns works + expected = df.copy() + expected.columns = Index(["c", "d"], name="level_1") + result = df.droplevel("level_2", axis="columns") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 751ed1dfdd847..34df8bb57dd91 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -840,25 +840,6 @@ def test_reindex_signature(self): "tolerance", } - def test_droplevel(self): - # GH20342 - df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) - df = df.set_index([0, 1]).rename_axis(["a", "b"]) - df.columns = MultiIndex.from_tuples( - [("c", "e"), ("d", "f")], names=["level_1", "level_2"] - ) - - # test that dropping of a level in index works - expected = df.reset_index("a", drop=True) - result = df.droplevel("a", axis="index") - tm.assert_frame_equal(result, expected) - - # test that dropping of a level in columns works - expected = df.copy() - expected.columns = Index(["c", "d"], name="level_1") - result = df.droplevel("level_2", axis="columns") - tm.assert_frame_equal(result, expected) - class TestIntervalIndex: def test_setitem(self): diff --git a/pandas/tests/series/methods/test_droplevel.py b/pandas/tests/series/methods/test_droplevel.py new file mode 100644 index 0000000000000..435eb5751de4b --- /dev/null +++ b/pandas/tests/series/methods/test_droplevel.py @@ -0,0 +1,19 @@ +import pytest + +from pandas import MultiIndex, Series +import pandas._testing as tm + + +class TestDropLevel: + def test_droplevel(self): + # GH#20342 + ser = Series([1, 2, 3, 4]) + ser.index = MultiIndex.from_arrays( + [(1, 2, 3, 4), (5, 6, 7, 8)], names=["a", "b"] + ) + expected = ser.reset_index("b", drop=True) + result = ser.droplevel("b", axis="index") + tm.assert_series_equal(result, expected) + # test that droplevel raises ValueError on axis != 0 + with pytest.raises(ValueError): + ser.droplevel(1, axis="columns") diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 9be8744d7223f..f6ca93b0c2882 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -152,16 +152,3 @@ def test_set_axis_inplace(self): for axis in [2, "foo"]: with pytest.raises(ValueError, match="No axis named"): s.set_axis(list("abcd"), axis=axis, inplace=False) - - def test_droplevel(self): - # GH20342 - ser = Series([1, 2, 3, 4]) - ser.index = MultiIndex.from_arrays( - [(1, 2, 3, 4), (5, 6, 7, 8)], names=["a", "b"] - ) - expected = ser.reset_index("b", drop=True) - result = ser.droplevel("b", axis="index") - tm.assert_series_equal(result, expected) - # test that droplevel raises ValueError on axis != 0 - with pytest.raises(ValueError): - ser.droplevel(1, axis="columns") From 66c6e8e8c642000b8dea9abc960333d0f2f7650b Mon Sep 17 00:00:00 2001 From: Jake Tae Date: Thu, 27 Feb 2020 21:39:48 +0900 Subject: [PATCH 171/388] Use defaultdict for optimization (#32278) --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fc10444ee064..b6b6a4fe74ed5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8446,9 +8446,8 @@ def isin(self, values) -> "DataFrame": def _from_nested_dict(data): # TODO: this should be seriously cythonized - new_data = {} + new_data = collections.defaultdict(dict) for index, s in data.items(): for col, v in s.items(): - new_data[col] = new_data.get(col, {}) new_data[col][index] = v return new_data From 922f93200833d9c51ee78c68b0912e359e0fbc73 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 04:52:36 -0800 Subject: [PATCH 172/388] PERF: pass through to numpy validation for iloc setitem (#32257) --- pandas/core/indexing.py | 15 ++------ pandas/tests/frame/indexing/test_indexing.py | 8 +++-- pandas/tests/indexing/test_floats.py | 36 +++++++------------- 3 files changed, 22 insertions(+), 37 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5adc65b488399..69283bc58799e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -8,7 +8,6 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - is_float, is_integer, is_iterator, is_list_like, @@ -1500,18 +1499,10 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): """ Much simpler as we only have to deal with our valid types. """ - labels = self.obj._get_axis(axis) - - # make need to convert a float key - if isinstance(key, slice): - labels._validate_positional_slice(key) - return key - - elif is_float(key): - # _validate_indexer call will always raise - labels._validate_indexer("positional", key, "iloc") + return key - self._validate_key(key, axis) + def _get_setitem_indexer(self, key): + # GH#32257 Fall through to let numnpy do validation return key # ------------------------------------------------------------------- diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 636cca0df9d4e..997414eceeb86 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -27,6 +27,9 @@ from pandas.tseries.offsets import BDay +# We pass through a TypeError raised by numpy +_slice_msg = "slice indices must be integers or None or have an __index__ method" + class TestGet: def test_get(self, float_frame): @@ -994,7 +997,8 @@ def test_getitem_setitem_fancy_exceptions(self, float_frame): with pytest.raises(IndexingError, match="Too many indexers"): ix[:, :, :] - with pytest.raises(IndexingError, match="Too many indexers"): + with pytest.raises(IndexError, match="too many indices for array"): + # GH#32257 we let numpy do validation, get their exception ix[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self, float_frame): @@ -1073,7 +1077,7 @@ def test_getitem_setitem_float_labels(self): cp = df.copy() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=_slice_msg): cp.iloc[1.0:5] = 0 with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 87520f5ab2577..a84e88cefbced 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -1,9 +1,17 @@ +import re + import numpy as np import pytest from pandas import DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series import pandas._testing as tm +# We pass through the error message from numpy +_slice_iloc_msg = re.escape( + "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean arrays are valid indices" +) + def gen_obj(klass, index): if klass is Series: @@ -62,11 +70,7 @@ def test_scalar_error(self, index_func): with pytest.raises(TypeError, match=msg): s.iloc[3.0] - msg = ( - f"cannot do positional indexing on {type(i).__name__} with these " - r"indexers \[3\.0\] of type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(IndexError, match=_slice_iloc_msg): s.iloc[3.0] = 0 @pytest.mark.parametrize( @@ -133,12 +137,7 @@ def test_scalar_non_numeric(self, index_func, klass): assert 3.0 not in s # setting with a float fails with iloc - msg = ( - r"cannot do (label|positional) indexing " - fr"on {type(i).__name__} with these indexers \[3\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(IndexError, match=_slice_iloc_msg): s.iloc[3.0] = 0 # setting with an indexer @@ -327,12 +326,7 @@ def test_scalar_float(self, klass): with pytest.raises(TypeError, match=msg): s.iloc[3.0] - msg = ( - "cannot do positional indexing " - fr"on {Float64Index.__name__} with these indexers \[3\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(IndexError, match=_slice_iloc_msg): s2.iloc[3.0] = 0 @pytest.mark.parametrize( @@ -376,11 +370,7 @@ def test_slice_non_numeric(self, index_func, l, klass): idxr(s)[l] # setitem - msg = ( - "cannot do positional indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) + msg = "slice indices must be integers or None or have an __index__ method" with pytest.raises(TypeError, match=msg): s.iloc[l] = 0 @@ -390,7 +380,7 @@ def test_slice_non_numeric(self, index_func, l, klass): r"\[(3|4)(\.0)?\] " r"of type (float|int)" ) - for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: + for idxr in [lambda x: x.loc, lambda x: x]: with pytest.raises(TypeError, match=msg): idxr(s)[l] = 0 From 736761f8fd7fef641fdd0aa4a6e8d9163126e929 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 08:10:59 -0800 Subject: [PATCH 173/388] TST: misplaced arithmetic tests (#32275) --- pandas/tests/frame/test_arithmetic.py | 33 ++++++++++ pandas/tests/frame/test_period.py | 16 ----- pandas/tests/frame/test_timezones.py | 11 ---- pandas/tests/series/test_arithmetic.py | 67 ++++++++++++++++++++- pandas/tests/series/test_datetime_values.py | 9 --- pandas/tests/series/test_timezones.py | 47 --------------- 6 files changed, 99 insertions(+), 84 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 44ad55517dcea..e4be8a979a70f 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -4,6 +4,7 @@ import numpy as np import pytest +import pytz import pandas as pd import pandas._testing as tm @@ -771,3 +772,35 @@ def test_frame_single_columns_object_sum_axis_1(): result = df.sum(axis=1) expected = pd.Series(["A", 1.2, 0]) tm.assert_series_equal(result, expected) + + +# ------------------------------------------------------------------- +# Unsorted +# These arithmetic tests were previously in other files, eventually +# should be parametrized and put into tests.arithmetic + + +class TestFrameArithmeticUnsorted: + def test_frame_add_tz_mismatch_converts_to_utc(self): + rng = pd.date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") + df = pd.DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"]) + + df_moscow = df.tz_convert("Europe/Moscow") + result = df + df_moscow + assert result.index.tz is pytz.utc + + result = df_moscow + df + assert result.index.tz is pytz.utc + + def test_align_frame(self): + rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") + ts = pd.DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected.values[1::2] = np.nan + tm.assert_frame_equal(result, expected) + + half = ts[::2] + result = ts + half.take(np.random.permutation(len(half))) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 1ce13fd31ba88..c378194b9e2b2 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -4,10 +4,6 @@ import pandas._testing as tm -def _permute(obj): - return obj.take(np.random.permutation(len(obj))) - - class TestPeriodIndex: def test_as_frame_columns(self): rng = period_range("1/1/2000", periods=5) @@ -42,15 +38,3 @@ def test_frame_index_to_string(self): # it works! frame.to_string() - - def test_align_frame(self): - rng = period_range("1/1/2000", "1/1/2010", freq="A") - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts + ts[::2] - expected = ts + ts - expected.values[1::2] = np.nan - tm.assert_frame_equal(result, expected) - - result = ts + _permute(ts[::2]) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index 62e8a4b470218..00a253d4e5ad0 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -80,17 +80,6 @@ def test_frame_join_tzaware(self): tm.assert_index_equal(result.index, ex_index) assert result.index.tz.zone == "US/Central" - def test_frame_add_tz_mismatch_converts_to_utc(self): - rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") - df = DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"]) - - df_moscow = df.tz_convert("Europe/Moscow") - result = df + df_moscow - assert result.index.tz is pytz.utc - - result = df_moscow + df - assert result.index.tz is pytz.utc - def test_frame_align_aware(self): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index f3ffdc373e178..10197766ce4a6 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -2,11 +2,12 @@ import numpy as np import pytest +import pytz from pandas._libs.tslibs import IncompatibleFrequency import pandas as pd -from pandas import Series +from pandas import Series, date_range import pandas._testing as tm @@ -203,3 +204,67 @@ def test_ser_cmp_result_names(self, names, op): ser = Series(cidx).rename(names[1]) result = op(ser, cidx) assert result.name == names[2] + + +# ------------------------------------------------------------------ +# Unsorted +# These arithmetic tests were previously in other files, eventually +# should be parametrized and put into tests.arithmetic + + +class TestTimeSeriesArithmetic: + # TODO: De-duplicate with test below + def test_series_add_tz_mismatch_converts_to_utc_duplicate(self): + rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") + ser = Series(np.random.randn(len(rng)), index=rng) + + ts_moscow = ser.tz_convert("Europe/Moscow") + + result = ser + ts_moscow + assert result.index.tz is pytz.utc + + result = ts_moscow + ser + assert result.index.tz is pytz.utc + + def test_series_add_tz_mismatch_converts_to_utc(self): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + perm = np.random.permutation(100)[:90] + ser1 = Series( + np.random.randn(90), index=rng.take(perm).tz_convert("US/Eastern") + ) + + perm = np.random.permutation(100)[:90] + ser2 = Series( + np.random.randn(90), index=rng.take(perm).tz_convert("Europe/Berlin") + ) + + result = ser1 + ser2 + + uts1 = ser1.tz_convert("utc") + uts2 = ser2.tz_convert("utc") + expected = uts1 + uts2 + + assert result.index.tz == pytz.UTC + tm.assert_series_equal(result, expected) + + def test_series_add_aware_naive_raises(self): + rng = date_range("1/1/2011", periods=10, freq="H") + ser = Series(np.random.randn(len(rng)), index=rng) + + ser_utc = ser.tz_localize("utc") + + with pytest.raises(Exception): + ser + ser_utc + + with pytest.raises(Exception): + ser_utc + ser + + def test_datetime_understood(self): + # Ensures it doesn't fail to create the right series + # reported in issue#16726 + series = pd.Series(pd.date_range("2012-01-01", periods=3)) + offset = pd.offsets.DateOffset(days=6) + result = series - offset + expected = pd.Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"])) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 59ae0cd63690c..d22dc72eaaadd 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -632,15 +632,6 @@ def test_date_tz(self): tm.assert_series_equal(s.dt.date, expected) tm.assert_series_equal(s.apply(lambda x: x.date()), expected) - def test_datetime_understood(self): - # Ensures it doesn't fail to create the right series - # reported in issue#16726 - series = pd.Series(pd.date_range("2012-01-01", periods=3)) - offset = pd.offsets.DateOffset(days=6) - result = series - offset - expected = pd.Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"])) - tm.assert_series_equal(result, expected) - def test_dt_timetz_accessor(self, tz_naive_fixture): # GH21358 tz = maybe_get_tz(tz_naive_fixture) diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index e729ff91293a8..a45c0bf8cf154 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -38,53 +38,6 @@ def test_string_index_alias_tz_aware(self, tz): result = ser["1/3/2000"] tm.assert_almost_equal(result, ser[2]) - # TODO: De-duplicate with test below - def test_series_add_tz_mismatch_converts_to_utc_duplicate(self): - rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") - ser = Series(np.random.randn(len(rng)), index=rng) - - ts_moscow = ser.tz_convert("Europe/Moscow") - - result = ser + ts_moscow - assert result.index.tz is pytz.utc - - result = ts_moscow + ser - assert result.index.tz is pytz.utc - - def test_series_add_tz_mismatch_converts_to_utc(self): - rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") - - perm = np.random.permutation(100)[:90] - ser1 = Series( - np.random.randn(90), index=rng.take(perm).tz_convert("US/Eastern") - ) - - perm = np.random.permutation(100)[:90] - ser2 = Series( - np.random.randn(90), index=rng.take(perm).tz_convert("Europe/Berlin") - ) - - result = ser1 + ser2 - - uts1 = ser1.tz_convert("utc") - uts2 = ser2.tz_convert("utc") - expected = uts1 + uts2 - - assert result.index.tz == pytz.UTC - tm.assert_series_equal(result, expected) - - def test_series_add_aware_naive_raises(self): - rng = date_range("1/1/2011", periods=10, freq="H") - ser = Series(np.random.randn(len(rng)), index=rng) - - ser_utc = ser.tz_localize("utc") - - with pytest.raises(Exception): - ser + ser_utc - - with pytest.raises(Exception): - ser_utc + ser - def test_series_align_aware(self): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") ser = Series(np.random.randn(len(idx1)), index=idx1) From cad3f1ceac2d12477a5bafdf9bb7ec869aa11ef9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 27 Feb 2020 16:11:47 +0000 Subject: [PATCH 174/388] CLN: Follow-up to #32158 (#32290) --- pandas/tests/window/test_window.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index 41b9d9e84f27e..c7c45f0e5e0de 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -29,11 +29,10 @@ def test_constructor(self, which): c(win_type="boxcar", window=2, min_periods=1, center=False) # not valid - msg = "|".join(["min_periods must be an integer", "center must be a boolean"]) for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match="min_periods must be an integer"): c(win_type="boxcar", window=2, min_periods=w) - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match="center must be a boolean"): c(win_type="boxcar", window=2, min_periods=1, center=w) for wt in ["foobar", 1]: From 0233a55ed7baaa5188baf060b06c7d0e4ac9393e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 08:19:13 -0800 Subject: [PATCH 175/388] TST: implement test_first (#32274) --- .../frame/methods/test_first_and_last.py | 61 ++++++++++++++++ pandas/tests/frame/test_timeseries.py | 52 -------------- .../series/methods/test_first_and_last.py | 69 +++++++++++++++++++ pandas/tests/series/test_timeseries.py | 54 --------------- 4 files changed, 130 insertions(+), 106 deletions(-) create mode 100644 pandas/tests/frame/methods/test_first_and_last.py create mode 100644 pandas/tests/series/methods/test_first_and_last.py diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py new file mode 100644 index 0000000000000..73e4128ddebb9 --- /dev/null +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -0,0 +1,61 @@ +""" +Note: includes tests for `last` +""" +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestFirst: + def test_first_subset(self): + ts = tm.makeTimeDataFrame(freq="12h") + result = ts.first("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(freq="D") + result = ts.first("10d") + assert len(result) == 10 + + result = ts.first("3M") + expected = ts[:"3/31/2000"] + tm.assert_frame_equal(result, expected) + + result = ts.first("21D") + expected = ts[:21] + tm.assert_frame_equal(result, expected) + + result = ts[:0].first("3M") + tm.assert_frame_equal(result, ts[:0]) + + def test_first_raises(self): + # GH#20725 + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.first("1D") + + def test_last_subset(self): + ts = tm.makeTimeDataFrame(freq="12h") + result = ts.last("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(nper=30, freq="D") + result = ts.last("10d") + assert len(result) == 10 + + result = ts.last("21D") + expected = ts["2000-01-10":] + tm.assert_frame_equal(result, expected) + + result = ts.last("21D") + expected = ts[-21:] + tm.assert_frame_equal(result, expected) + + result = ts[:0].last("3M") + tm.assert_frame_equal(result, ts[:0]) + + def test_last_raises(self): + # GH20725 + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.last("1D") diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index b713af92eac27..5956f73bb11f0 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -119,58 +119,6 @@ def test_first_valid_index_all_nan(self, klass): assert obj.first_valid_index() is None assert obj.iloc[:0].first_valid_index() is None - def test_first_subset(self): - ts = tm.makeTimeDataFrame(freq="12h") - result = ts.first("10d") - assert len(result) == 20 - - ts = tm.makeTimeDataFrame(freq="D") - result = ts.first("10d") - assert len(result) == 10 - - result = ts.first("3M") - expected = ts[:"3/31/2000"] - tm.assert_frame_equal(result, expected) - - result = ts.first("21D") - expected = ts[:21] - tm.assert_frame_equal(result, expected) - - result = ts[:0].first("3M") - tm.assert_frame_equal(result, ts[:0]) - - def test_first_raises(self): - # GH20725 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - with pytest.raises(TypeError): # index is not a DatetimeIndex - df.first("1D") - - def test_last_subset(self): - ts = tm.makeTimeDataFrame(freq="12h") - result = ts.last("10d") - assert len(result) == 20 - - ts = tm.makeTimeDataFrame(nper=30, freq="D") - result = ts.last("10d") - assert len(result) == 10 - - result = ts.last("21D") - expected = ts["2000-01-10":] - tm.assert_frame_equal(result, expected) - - result = ts.last("21D") - expected = ts[-21:] - tm.assert_frame_equal(result, expected) - - result = ts[:0].last("3M") - tm.assert_frame_equal(result, ts[:0]) - - def test_last_raises(self): - # GH20725 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - with pytest.raises(TypeError): # index is not a DatetimeIndex - df.last("1D") - def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]}) diff --git a/pandas/tests/series/methods/test_first_and_last.py b/pandas/tests/series/methods/test_first_and_last.py new file mode 100644 index 0000000000000..7629dc8cda30b --- /dev/null +++ b/pandas/tests/series/methods/test_first_and_last.py @@ -0,0 +1,69 @@ +""" +Note: includes tests for `last` +""" + +import numpy as np +import pytest + +from pandas import Series, date_range +import pandas._testing as tm + + +class TestFirst: + def test_first_subset(self): + rng = date_range("1/1/2000", "1/1/2010", freq="12h") + ts = Series(np.random.randn(len(rng)), index=rng) + result = ts.first("10d") + assert len(result) == 20 + + rng = date_range("1/1/2000", "1/1/2010", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + result = ts.first("10d") + assert len(result) == 10 + + result = ts.first("3M") + expected = ts[:"3/31/2000"] + tm.assert_series_equal(result, expected) + + result = ts.first("21D") + expected = ts[:21] + tm.assert_series_equal(result, expected) + + result = ts[:0].first("3M") + tm.assert_series_equal(result, ts[:0]) + + def test_first_raises(self): + # GH#20725 + ser = Series("a b c".split()) + msg = "'first' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): + ser.first("1D") + + def test_last_subset(self): + rng = date_range("1/1/2000", "1/1/2010", freq="12h") + ts = Series(np.random.randn(len(rng)), index=rng) + result = ts.last("10d") + assert len(result) == 20 + + rng = date_range("1/1/2000", "1/1/2010", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + result = ts.last("10d") + assert len(result) == 10 + + result = ts.last("21D") + expected = ts["12/12/2009":] + tm.assert_series_equal(result, expected) + + result = ts.last("21D") + expected = ts[-21:] + tm.assert_series_equal(result, expected) + + result = ts[:0].last("3M") + tm.assert_series_equal(result, ts[:0]) + + def test_last_raises(self): + # GH#20725 + ser = Series("a b c".split()) + msg = "'last' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): + ser.last("1D") diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 544634a2d16e9..592fd6ba558ff 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -167,60 +167,6 @@ def test_promote_datetime_date(self): expected = rng.get_indexer(ts_slice.index) tm.assert_numpy_array_equal(result, expected) - def test_first_subset(self): - ts = _simple_ts("1/1/2000", "1/1/2010", freq="12h") - result = ts.first("10d") - assert len(result) == 20 - - ts = _simple_ts("1/1/2000", "1/1/2010") - result = ts.first("10d") - assert len(result) == 10 - - result = ts.first("3M") - expected = ts[:"3/31/2000"] - tm.assert_series_equal(result, expected) - - result = ts.first("21D") - expected = ts[:21] - tm.assert_series_equal(result, expected) - - result = ts[:0].first("3M") - tm.assert_series_equal(result, ts[:0]) - - def test_first_raises(self): - # GH20725 - ser = pd.Series("a b c".split()) - msg = "'first' only supports a DatetimeIndex index" - with pytest.raises(TypeError, match=msg): - ser.first("1D") - - def test_last_subset(self): - ts = _simple_ts("1/1/2000", "1/1/2010", freq="12h") - result = ts.last("10d") - assert len(result) == 20 - - ts = _simple_ts("1/1/2000", "1/1/2010") - result = ts.last("10d") - assert len(result) == 10 - - result = ts.last("21D") - expected = ts["12/12/2009":] - tm.assert_series_equal(result, expected) - - result = ts.last("21D") - expected = ts[-21:] - tm.assert_series_equal(result, expected) - - result = ts[:0].last("3M") - tm.assert_series_equal(result, ts[:0]) - - def test_last_raises(self): - # GH20725 - ser = pd.Series("a b c".split()) - msg = "'last' only supports a DatetimeIndex index" - with pytest.raises(TypeError, match=msg): - ser.last("1D") - def test_format_pre_1900_dates(self): rng = date_range("1/1/1850", "1/1/1950", freq="A-DEC") rng.format() From 674dd6932e82775f3a51ef8b97ba44df4015c909 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 27 Feb 2020 16:56:47 +0000 Subject: [PATCH 176/388] CI: Temporary fix to the docs build while we fix the ssh problems (#32279) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7493be34d10c7..a337ccbc98650 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -188,5 +188,5 @@ jobs: run: | cd pandas_web git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git - git push -f origin master + git push -f origin master || true if: github.event_name == 'push' From 217a4283d05c179c47609aed7c79377ca2ef2ae0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 09:01:21 -0800 Subject: [PATCH 177/388] TST: test_to_time (#32285) --- pandas/tests/indexes/datetimes/test_tools.py | 48 +--------------- pandas/tests/tools/test_to_time.py | 58 ++++++++++++++++++++ 2 files changed, 59 insertions(+), 47 deletions(-) create mode 100644 pandas/tests/tools/test_to_time.py diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index ecfecfb414326..a91c837c9d9a2 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -2,7 +2,7 @@ import calendar from collections import deque -from datetime import datetime, time, timedelta +from datetime import datetime, timedelta import locale from dateutil.parser import parse @@ -2112,52 +2112,6 @@ def test_parsers_timestring(self, cache): assert result4 == exp_now assert result5 == exp_now - @td.skip_if_has_locale - def test_parsers_time(self): - # GH11818 - strings = [ - "14:15", - "1415", - "2:15pm", - "0215pm", - "14:15:00", - "141500", - "2:15:00pm", - "021500pm", - time(14, 15), - ] - expected = time(14, 15) - - for time_string in strings: - assert tools.to_time(time_string) == expected - - new_string = "14.15" - msg = r"Cannot convert arg \['14\.15'\] to a time" - with pytest.raises(ValueError, match=msg): - tools.to_time(new_string) - assert tools.to_time(new_string, format="%H.%M") == expected - - arg = ["14:15", "20:20"] - expected_arr = [time(14, 15), time(20, 20)] - assert tools.to_time(arg) == expected_arr - assert tools.to_time(arg, format="%H:%M") == expected_arr - assert tools.to_time(arg, infer_time_format=True) == expected_arr - assert tools.to_time(arg, format="%I:%M%p", errors="coerce") == [None, None] - - res = tools.to_time(arg, format="%I:%M%p", errors="ignore") - tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) - - with pytest.raises(ValueError): - tools.to_time(arg, format="%I:%M%p", errors="raise") - - tm.assert_series_equal( - tools.to_time(Series(arg, name="test")), Series(expected_arr, name="test") - ) - - res = tools.to_time(np.array(arg)) - assert isinstance(res, list) - assert res == expected_arr - @pytest.mark.parametrize("cache", [True, False]) @pytest.mark.parametrize( "dt_string, tz, dt_string_repr", diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py new file mode 100644 index 0000000000000..17ab492aca725 --- /dev/null +++ b/pandas/tests/tools/test_to_time.py @@ -0,0 +1,58 @@ +from datetime import time + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import Series +import pandas._testing as tm +from pandas.core.tools.datetimes import to_time + + +class TestToTime: + @td.skip_if_has_locale + def test_parsers_time(self): + # GH#11818 + strings = [ + "14:15", + "1415", + "2:15pm", + "0215pm", + "14:15:00", + "141500", + "2:15:00pm", + "021500pm", + time(14, 15), + ] + expected = time(14, 15) + + for time_string in strings: + assert to_time(time_string) == expected + + new_string = "14.15" + msg = r"Cannot convert arg \['14\.15'\] to a time" + with pytest.raises(ValueError, match=msg): + to_time(new_string) + assert to_time(new_string, format="%H.%M") == expected + + arg = ["14:15", "20:20"] + expected_arr = [time(14, 15), time(20, 20)] + assert to_time(arg) == expected_arr + assert to_time(arg, format="%H:%M") == expected_arr + assert to_time(arg, infer_time_format=True) == expected_arr + assert to_time(arg, format="%I:%M%p", errors="coerce") == [None, None] + + res = to_time(arg, format="%I:%M%p", errors="ignore") + tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + + with pytest.raises(ValueError): + to_time(arg, format="%I:%M%p", errors="raise") + + tm.assert_series_equal( + to_time(Series(arg, name="test")), Series(expected_arr, name="test") + ) + + res = to_time(np.array(arg)) + assert isinstance(res, list) + assert res == expected_arr From 1d18e9523ae2a7834ee7d3d0f20fe6d0bde6e96f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 09:07:10 -0800 Subject: [PATCH 178/388] implement test_to_period (#32270) --- pandas/tests/series/methods/test_to_period.py | 47 +++++++++++++++++++ pandas/tests/series/test_timeseries.py | 35 -------------- 2 files changed, 47 insertions(+), 35 deletions(-) create mode 100644 pandas/tests/series/methods/test_to_period.py diff --git a/pandas/tests/series/methods/test_to_period.py b/pandas/tests/series/methods/test_to_period.py new file mode 100644 index 0000000000000..28c4aad3edf32 --- /dev/null +++ b/pandas/tests/series/methods/test_to_period.py @@ -0,0 +1,47 @@ +import numpy as np + +from pandas import ( + DataFrame, + DatetimeIndex, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm + + +class TestToPeriod: + def test_to_period(self): + rng = date_range("1/1/2000", "1/1/2001", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + + pts = ts.to_period() + exp = ts.copy() + exp.index = period_range("1/1/2000", "1/1/2001") + tm.assert_series_equal(pts, exp) + + pts = ts.to_period("M") + exp.index = exp.index.asfreq("M") + tm.assert_index_equal(pts.index, exp.index.asfreq("M")) + tm.assert_series_equal(pts, exp) + + # GH#7606 without freq + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) + exp_idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" + ) + + s = Series(np.random.randn(4), index=idx) + expected = s.copy() + expected.index = exp_idx + tm.assert_series_equal(s.to_period(), expected) + + df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + expected = df.copy() + expected.index = exp_idx + tm.assert_frame_equal(df.to_period(), expected) + + expected = df.copy() + expected.columns = exp_idx + tm.assert_frame_equal(df.to_period(axis=1), expected) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 592fd6ba558ff..efaf5f806e935 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -173,41 +173,6 @@ def test_format_pre_1900_dates(self): ts = Series(1, index=rng) repr(ts) - def test_to_period(self): - from pandas.core.indexes.period import period_range - - ts = _simple_ts("1/1/2000", "1/1/2001") - - pts = ts.to_period() - exp = ts.copy() - exp.index = period_range("1/1/2000", "1/1/2001") - tm.assert_series_equal(pts, exp) - - pts = ts.to_period("M") - exp.index = exp.index.asfreq("M") - tm.assert_index_equal(pts.index, exp.index.asfreq("M")) - tm.assert_series_equal(pts, exp) - - # GH 7606 without freq - idx = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) - exp_idx = pd.PeriodIndex( - ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" - ) - - s = Series(np.random.randn(4), index=idx) - expected = s.copy() - expected.index = exp_idx - tm.assert_series_equal(s.to_period(), expected) - - df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) - expected = df.copy() - expected.index = exp_idx - tm.assert_frame_equal(df.to_period(), expected) - - expected = df.copy() - expected.columns = exp_idx - tm.assert_frame_equal(df.to_period(axis=1), expected) - def test_groupby_count_dateparseerror(self): dr = date_range(start="1/1/2012", freq="5min", periods=10) From 86f04686fd5b8a1f8720c53b001fa09e14a31e41 Mon Sep 17 00:00:00 2001 From: Ken Mankoff Date: Thu, 27 Feb 2020 09:53:34 -0800 Subject: [PATCH 179/388] fixed minor docstring typo (#32298) --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6b6a4fe74ed5..6f5aef4884ccd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -832,7 +832,6 @@ def style(self) -> "Styler": Returns a Styler object. Contains methods for building a styled HTML representation of the DataFrame. - a styled HTML representation fo the DataFrame. See Also -------- From d5d68348748dc345275c65fa6b6dbe61deaa984d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 10:45:18 -0800 Subject: [PATCH 180/388] REF/TST: misplaced tests in test_timeseries, test_timezones (#32300) --- pandas/tests/frame/test_constructors.py | 19 ++++ pandas/tests/frame/test_timezones.py | 8 -- .../tests/indexes/datetimes/test_datetime.py | 20 ++++- pandas/tests/indexes/datetimes/test_setops.py | 35 ++++++++ .../indexes/multi/test_get_level_values.py | 13 +++ pandas/tests/series/test_constructors.py | 7 ++ pandas/tests/series/test_timeseries.py | 88 +------------------ pandas/tests/series/test_timezones.py | 7 -- 8 files changed, 94 insertions(+), 103 deletions(-) create mode 100644 pandas/tests/indexes/multi/test_get_level_values.py diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 72fc0010cbbce..a42cfc6a214ad 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -7,6 +7,7 @@ import numpy.ma as ma import numpy.ma.mrecords as mrecords import pytest +import pytz from pandas.compat import is_platform_little_endian from pandas.compat.numpy import _is_numpy_dev @@ -2389,6 +2390,12 @@ def test_from_records_series_list_dict(self): result = DataFrame.from_records(data) tm.assert_frame_equal(result, expected) + def test_frame_from_records_utc(self): + rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)} + + # it works + DataFrame.from_records([rec], index="begin_time") + def test_to_frame_with_falsey_names(self): # GH 16114 result = Series(name=0, dtype=object).to_frame().dtypes @@ -2460,6 +2467,18 @@ def test_construct_with_two_categoricalindex_series(self): ) tm.assert_frame_equal(result, expected) + def test_from_M8_structured(self): + dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] + arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")]) + df = DataFrame(arr) + + assert df["Date"][0] == dates[0][0] + assert df["Forecasting"][0] == dates[0][1] + + s = Series(arr["Date"]) + assert isinstance(s[0], Timestamp) + assert s[0] == dates[0][0] + class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self): diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index 00a253d4e5ad0..dfd4fb1855383 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -1,8 +1,6 @@ """ Tests for DataFrame timezone-related methods """ -from datetime import datetime - import numpy as np import pytest import pytz @@ -53,12 +51,6 @@ def test_frame_values_with_tz(self): result = df.values tm.assert_numpy_array_equal(result, expected) - def test_frame_from_records_utc(self): - rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)} - - # it works - DataFrame.from_records([rec], index="begin_time") - def test_frame_join_tzaware(self): test1 = DataFrame( np.zeros((6, 3)), diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 1a72ef2bdf1aa..6217f225d496e 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -5,7 +5,7 @@ import pytest import pandas as pd -from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets +from pandas import DataFrame, DatetimeIndex, Index, NaT, Timestamp, date_range, offsets import pandas._testing as tm randn = np.random.randn @@ -20,6 +20,24 @@ def test_roundtrip_pickle_with_tz(self): unpickled = tm.round_trip_pickle(index) tm.assert_index_equal(index, unpickled) + def test_pickle(self): + + # GH#4606 + p = tm.round_trip_pickle(NaT) + assert p is NaT + + idx = pd.to_datetime(["2013-01-01", NaT, "2014-01-06"]) + idx_p = tm.round_trip_pickle(idx) + assert idx_p[0] == idx[0] + assert idx_p[1] is NaT + assert idx_p[2] == idx[2] + + # GH#11002 + # don't infer freq + idx = date_range("1750-1-1", "2050-1-1", freq="7D") + idx_p = tm.round_trip_pickle(idx) + tm.assert_index_equal(idx, idx_p) + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): # GH7774 index = date_range("20130101", periods=3, tz="US/Eastern") diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index d58ecbad4c1b3..ba069f5245de4 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -347,6 +347,41 @@ def test_datetimeindex_diff(self, sort): dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98) assert len(dti1.difference(dti2, sort)) == 2 + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"]) + def test_setops_preserve_freq(self, tz): + rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz) + + result = rng[:50].union(rng[50:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[30:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[60:100]) + assert result.name == rng.name + assert result.freq is None + assert result.tz == rng.tz + + result = rng[:50].intersection(rng[25:75]) + assert result.name == rng.name + assert result.freqstr == "D" + assert result.tz == rng.tz + + nofreq = DatetimeIndex(list(rng[25:75]), name="other") + result = rng[:50].union(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].intersection(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + class TestBusinessDatetimeIndex: def setup_method(self, method): diff --git a/pandas/tests/indexes/multi/test_get_level_values.py b/pandas/tests/indexes/multi/test_get_level_values.py new file mode 100644 index 0000000000000..6f0b23c1ef4a0 --- /dev/null +++ b/pandas/tests/indexes/multi/test_get_level_values.py @@ -0,0 +1,13 @@ +from pandas import MultiIndex, Timestamp, date_range + + +class TestGetLevelValues: + def test_get_level_values_box_datetime64(self): + + dates = date_range("1/1/2000", periods=4) + levels = [dates, [0, 1]] + codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] + + index = MultiIndex(levels=levels, codes=codes) + + assert isinstance(index.get_level_values(0)[0], Timestamp) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index b0d06793dbe13..1a794f8656abe 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1421,3 +1421,10 @@ def test_constructor_tz_mixed_data(self): result = Series(dt_list) expected = Series(dt_list, dtype=object) tm.assert_series_equal(result, expected) + + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): + # GH#25843 + tz = tz_aware_fixture + result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]") + expected = Series([Timestamp("2019")]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index efaf5f806e935..c4b2e2edd845a 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,21 +1,11 @@ -from datetime import datetime from io import StringIO import numpy as np -import pytest from pandas._libs.tslib import iNaT import pandas as pd -from pandas import ( - DataFrame, - DatetimeIndex, - NaT, - Series, - Timestamp, - date_range, - timedelta_range, -) +from pandas import DataFrame, DatetimeIndex, Series, date_range, timedelta_range import pandas._testing as tm @@ -225,82 +215,6 @@ def test_asfreq_resample_set_correct_freq(self): # does .resample() set .freq correctly? assert df.resample("D").asfreq().index.freq == "D" - def test_pickle(self): - - # GH4606 - p = tm.round_trip_pickle(NaT) - assert p is NaT - - idx = pd.to_datetime(["2013-01-01", NaT, "2014-01-06"]) - idx_p = tm.round_trip_pickle(idx) - assert idx_p[0] == idx[0] - assert idx_p[1] is NaT - assert idx_p[2] == idx[2] - - # GH11002 - # don't infer freq - idx = date_range("1750-1-1", "2050-1-1", freq="7D") - idx_p = tm.round_trip_pickle(idx) - tm.assert_index_equal(idx, idx_p) - - @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"]) - def test_setops_preserve_freq(self, tz): - rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz) - - result = rng[:50].union(rng[50:100]) - assert result.name == rng.name - assert result.freq == rng.freq - assert result.tz == rng.tz - - result = rng[:50].union(rng[30:100]) - assert result.name == rng.name - assert result.freq == rng.freq - assert result.tz == rng.tz - - result = rng[:50].union(rng[60:100]) - assert result.name == rng.name - assert result.freq is None - assert result.tz == rng.tz - - result = rng[:50].intersection(rng[25:75]) - assert result.name == rng.name - assert result.freqstr == "D" - assert result.tz == rng.tz - - nofreq = DatetimeIndex(list(rng[25:75]), name="other") - result = rng[:50].union(nofreq) - assert result.name is None - assert result.freq == rng.freq - assert result.tz == rng.tz - - result = rng[:50].intersection(nofreq) - assert result.name is None - assert result.freq == rng.freq - assert result.tz == rng.tz - - def test_from_M8_structured(self): - dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] - arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")]) - df = DataFrame(arr) - - assert df["Date"][0] == dates[0][0] - assert df["Forecasting"][0] == dates[0][1] - - s = Series(arr["Date"]) - assert isinstance(s[0], Timestamp) - assert s[0] == dates[0][0] - - def test_get_level_values_box(self): - from pandas import MultiIndex - - dates = date_range("1/1/2000", periods=4) - levels = [dates, [0, 1]] - codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] - - index = MultiIndex(levels=levels, codes=codes) - - assert isinstance(index.get_level_values(0)[0], Timestamp) - def test_view_tz(self): # GH#24024 ser = pd.Series(pd.date_range("2000", periods=4, tz="US/Central")) diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index a45c0bf8cf154..ae4fd12abdb88 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -76,10 +76,3 @@ def test_tz_localize_convert_copy_inplace_mutate(self, copy, method, tz): np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz) ) tm.assert_series_equal(result, expected) - - def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): - # GH 25843 - tz = tz_aware_fixture - result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]") - expected = Series([Timestamp("2019")]) - tm.assert_series_equal(result, expected) From b4cbc196bae1efbd9d989204afc87f4b2fc456b3 Mon Sep 17 00:00:00 2001 From: Samira-g-js <58177297+Samira-g-js@users.noreply.github.com> Date: Thu, 27 Feb 2020 19:31:57 +0000 Subject: [PATCH 181/388] Added in a error message (#32105) --- pandas/tests/arrays/test_array.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index b1b5a9482e34f..f42b16cf18f20 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -291,7 +291,7 @@ class DecimalArray2(DecimalArray): @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): if isinstance(scalars, (pd.Series, pd.Index)): - raise TypeError + raise TypeError("scalars should not be of type pd.Series or pd.Index") return super()._from_sequence(scalars, dtype=dtype, copy=copy) @@ -301,7 +301,9 @@ def test_array_unboxes(index_or_series): data = box([decimal.Decimal("1"), decimal.Decimal("2")]) # make sure it works - with pytest.raises(TypeError): + with pytest.raises( + TypeError, match="scalars should not be of type pd.Series or pd.Index" + ): DecimalArray2._from_sequence(data) result = pd.array(data, dtype="decimal2") From 4800ab4d39bbb64bcbbef24db31517aa57d7d6b3 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Thu, 27 Feb 2020 21:42:38 +0000 Subject: [PATCH 182/388] DOC: Add example for multiindex series and dataframe merge (#32068) --- doc/source/user_guide/merging.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 8fdcd8d281a41..8302b5c5dea60 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -724,6 +724,27 @@ either the left or right tables, the values in the joined table will be labels=['left', 'right'], vertical=False); plt.close('all'); +You can merge a mult-indexed Series and a DataFrame, if the names of +the MultiIndex correspond to the columns from the DataFrame. Transform +the Series to a DataFrame using :meth:`Series.reset_index` before merging, +as shown in the following example. + +.. ipython:: python + + df = pd.DataFrame({"Let": ["A", "B", "C"], "Num": [1, 2, 3]}) + df + + ser = pd.Series( + ["a", "b", "c", "d", "e", "f"], + index=pd.MultiIndex.from_arrays( + [["A", "B", "C"] * 2, [1, 2, 3, 4, 5, 6]], names=["Let", "Num"] + ), + ) + ser + + result = pd.merge(df, ser.reset_index(), on=['Let', 'Num']) + + Here is another example with duplicate join keys in DataFrames: .. ipython:: python From ea1d8fadb95fbc7cafe036274006228400817fd4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 27 Feb 2020 23:07:25 +0000 Subject: [PATCH 183/388] BUG: fix in categorical merges (#32079) --- doc/source/whatsnew/v1.1.0.rst | 3 ++- pandas/_libs/join.pyx | 2 ++ pandas/core/indexes/category.py | 7 +++++++ pandas/tests/reshape/merge/test_merge.py | 22 ++++++++++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2b64b85863def..830b9042b3071 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -126,9 +126,10 @@ Bug fixes Categorical ^^^^^^^^^^^ + +- Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`) - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) - -- Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index dfa7aa708d681..f696591cf3bd1 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -254,6 +254,8 @@ ctypedef fused join_t: float64_t float32_t object + int8_t + int16_t int32_t int64_t uint64_t diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 5f0d6ea2d6278..4475cb4141f65 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -29,6 +29,7 @@ from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name from pandas.core.indexes.extension import ExtensionIndex, inherit_names import pandas.core.missing as missing +from pandas.core.ops import get_op_result_name _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass="CategoricalIndex")) @@ -763,6 +764,12 @@ def _delegate_method(self, name: str, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) + def _wrap_joined_index( + self, joined: np.ndarray, other: "CategoricalIndex" + ) -> "CategoricalIndex": + name = get_op_result_name(self, other) + return self._create_from_codes(joined, name=name) + CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 4f2cd878df613..d80e2e7afceef 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2163,3 +2163,25 @@ def test_merge_datetime_upcast_dtype(): } ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("n_categories", [5, 128]) +def test_categorical_non_unique_monotonic(n_categories): + # GH 28189 + # With n_categories as 5, we test the int8 case is hit in libjoin, + # with n_categories as 128 we test the int16 case. + left_index = CategoricalIndex([0] + list(range(n_categories))) + df1 = DataFrame(range(n_categories + 1), columns=["value"], index=left_index) + df2 = DataFrame( + [[6]], + columns=["value"], + index=CategoricalIndex([0], categories=np.arange(n_categories)), + ) + + result = merge(df1, df2, how="left", left_index=True, right_index=True) + expected = DataFrame( + [[i, 6.0] if i < 2 else [i, np.nan] for i in range(n_categories + 1)], + columns=["value_x", "value_y"], + index=left_index, + ) + tm.assert_frame_equal(expected, result) From 1e7dc49fd45b41c260f2256f8bb28281c60948ae Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 27 Feb 2020 23:08:15 +0000 Subject: [PATCH 184/388] WEB: Add greeting note to CoC (#32024) --- web/pandas/community/coc.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web/pandas/community/coc.md b/web/pandas/community/coc.md index bf62f4e00f847..d2af9c3fdd25b 100644 --- a/web/pandas/community/coc.md +++ b/web/pandas/community/coc.md @@ -20,6 +20,9 @@ Examples of unacceptable behavior by participants include: addresses, without explicit permission * Other unethical or unprofessional conduct +Furthermore, we encourage inclusive behavior - for example, +please don’t say “hey guys!” but “hey everyone!”. + Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or From 5fa9860f51bde09fb7d1c9873d56e380d4c4facf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 15:08:51 -0800 Subject: [PATCH 185/388] API/BUG: raise only KeyError failed on geitem/loc lookups (#31867) --- doc/source/whatsnew/v1.1.0.rst | 71 ++++++++++++++++++++++++- pandas/core/indexes/base.py | 6 +-- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 4 +- pandas/core/indexing.py | 2 +- pandas/tests/frame/test_constructors.py | 6 +-- pandas/tests/indexing/test_floats.py | 29 ++++------ pandas/tests/indexing/test_loc.py | 2 +- 8 files changed, 89 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 830b9042b3071..0f18a1fd81815 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -90,7 +90,76 @@ Backwards incompatible API changes now raise a ``TypeError`` if a not-accepted keyword argument is passed into it. Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`) - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`) -- + +.. _whatsnew_110.api_breaking.indexing_raises_key_errors: + +Failed Label-Based Lookups Always Raise KeyError +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Label lookups ``series[key]``, ``series.loc[key]`` and ``frame.loc[key]`` +used to raises either ``KeyError`` or ``TypeError`` depending on the type of +key and type of :class:`Index`. These now consistently raise ``KeyError`` (:issue:`31867`) + +.. ipython:: python + + ser1 = pd.Series(range(3), index=[0, 1, 2]) + ser2 = pd.Series(range(3), index=pd.date_range("2020-02-01", periods=3)) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + TypeError: cannot do label indexing on DatetimeIndex with these indexers [1] of type int + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') + +*New behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + KeyError: 1.5 + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + KeyError: 1.5 + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + KeyError: 1 + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b5e323fbd0fa4..5b674458e95ee 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3096,7 +3096,7 @@ def _convert_scalar_indexer(self, key, kind: str_t): if kind == "getitem" and is_float(key): if not self.is_floating(): - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "loc" and is_float(key): @@ -3110,11 +3110,11 @@ def _convert_scalar_indexer(self, key, kind: str_t): "string", "mixed", ]: - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "loc" and is_integer(key): if not (is_integer_dtype(self.dtype) or is_object_dtype(self.dtype)): - self._invalid_indexer("label", key) + raise KeyError(key) return key diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 4475cb4141f65..8c2d7f4aa6c0e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -581,7 +581,7 @@ def _convert_scalar_indexer(self, key, kind: str): try: return self.categories._convert_scalar_indexer(key, kind="loc") except TypeError: - self._invalid_indexer("label", key) + raise KeyError(key) return super()._convert_scalar_indexer(key, kind=kind) @Appender(Index._convert_list_indexer.__doc__) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 72a2aba2d8a88..d3038ae88652b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -397,9 +397,9 @@ def _convert_scalar_indexer(self, key, kind: str): is_int = is_integer(key) is_flt = is_float(key) if kind == "loc" and (is_int or is_flt): - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "getitem" and is_flt: - self._invalid_indexer("label", key) + raise KeyError(key) return super()._convert_scalar_indexer(key, kind=kind) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 69283bc58799e..a0e96ac169ff7 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1143,7 +1143,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): # try to find out correct indexer, if not type correct raise try: key = labels._convert_scalar_indexer(key, kind="loc") - except TypeError: + except KeyError: # but we will allow setting if not is_setter: raise diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a42cfc6a214ad..071d2409f1be2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1864,11 +1864,7 @@ def check(df): # No NaN found -> error if len(indexer) == 0: - msg = ( - "cannot do label indexing on RangeIndex " - r"with these indexers \[nan\] of type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^nan$"): df.loc[:, np.nan] # single nan should result in Series elif len(indexer) == 1: diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index a84e88cefbced..c966962a7c87d 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -97,11 +97,9 @@ def test_scalar_non_numeric(self, index_func, klass): # getting for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]: - # gettitem on a DataFrame is a KeyError as it is indexing - # via labels on the columns - if getitem and isinstance(s, DataFrame): + if getitem: error = KeyError - msg = r"^3(\.0)?$" + msg = r"^3\.0?$" else: error = TypeError msg = ( @@ -120,6 +118,9 @@ def test_scalar_non_numeric(self, index_func, klass): "string", "unicode", "mixed", + "period", + "timedelta64", + "datetime64", }: error = KeyError msg = r"^3\.0$" @@ -181,12 +182,7 @@ def test_scalar_non_numeric_series_fallback(self, index_func): i = index_func(5) s = Series(np.arange(len(i)), index=i) s[3] - msg = ( - r"cannot do (label|positional) indexing " - fr"on {type(i).__name__} with these indexers \[3\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^3.0$"): s[3.0] def test_scalar_with_mixed(self): @@ -197,12 +193,12 @@ def test_scalar_with_mixed(self): # lookup in a pure stringstr # with an invalid indexer msg = ( - "cannot do label indexing " - fr"on {Index.__name__} with these indexers \[1\.0\] of " + r"cannot do label indexing " + r"on Index with these indexers \[1\.0\] of " r"type float|" "Cannot index by location index with a non-integer key" ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^1.0$"): s2[1.0] with pytest.raises(TypeError, match=msg): s2.iloc[1.0] @@ -216,12 +212,7 @@ def test_scalar_with_mixed(self): # mixed index so we have label # indexing - msg = ( - "cannot do label indexing " - fr"on {Index.__name__} with these indexers \[1\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^1.0$"): s3[1.0] result = s3[1] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 276d11a67ad18..4d042af8d59b4 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -35,7 +35,7 @@ def test_loc_getitem_label_out_of_range(self): "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError, ) self.check_result("loc", 20, typs=["labels"], fails=KeyError) - self.check_result("loc", 20, typs=["ts"], axes=0, fails=TypeError) + self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError) self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError) def test_loc_getitem_label_list(self): From edcf1c8f87beeb91544e0cd4d26bb32cf4b742a7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Feb 2020 16:09:16 -0800 Subject: [PATCH 186/388] DOC: Reorder 1.0 releases in whatsnew/index.rst (#32309) --- doc/source/whatsnew/index.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 68aabfe76d8de..cbfeb0352c283 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -7,7 +7,7 @@ Release Notes ************* This is the list of changes to pandas between each release. For full details, -see the commit logs at https://github.com/pandas-dev/pandas. For install and +see the `commit logs `_. For install and upgrade instructions, see :ref:`install`. Version 1.1 @@ -24,9 +24,9 @@ Version 1.0 .. toctree:: :maxdepth: 2 - v1.0.0 - v1.0.1 v1.0.2 + v1.0.1 + v1.0.0 Version 0.25 ------------ From fc60870408d8bf1e7a81d231efdc4f8f83b2d74b Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 28 Feb 2020 12:15:58 +0200 Subject: [PATCH 187/388] STY: spaces in wrong place (#32323) --- pandas/tests/base/test_ops.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index f85d823cb2fac..06ba6cc34ad92 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -213,9 +213,9 @@ def test_value_counts_unique_nunique(self, index_or_series_obj): if orig.duplicated().any(): pytest.xfail( - "The test implementation isn't flexible enough to deal" - " with duplicated values. This isn't a bug in the" - " application code, but in the test code." + "The test implementation isn't flexible enough to deal " + "with duplicated values. This isn't a bug in the " + "application code, but in the test code." ) # create repeated values, 'n'th element is repeated by n+1 times @@ -279,9 +279,9 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): pytest.skip("MultiIndex doesn't support isna") elif orig.duplicated().any(): pytest.xfail( - "The test implementation isn't flexible enough to deal" - " with duplicated values. This isn't a bug in the" - " application code, but in the test code." + "The test implementation isn't flexible enough to deal " + "with duplicated values. This isn't a bug in the " + "application code, but in the test code." ) # special assign to the numpy array From 24827e5d93f55368803a8e6a599e5b0cf858ac24 Mon Sep 17 00:00:00 2001 From: Ram Rachum Date: Fri, 28 Feb 2020 12:22:52 +0200 Subject: [PATCH 188/388] Fix exception causes all over the code (#32322) --- doc/make.py | 4 +-- pandas/__init__.py | 12 ++++---- pandas/_config/config.py | 4 +-- pandas/core/algorithms.py | 4 +-- pandas/core/arrays/_ranges.py | 4 +-- pandas/core/arrays/categorical.py | 8 +++--- pandas/core/arrays/datetimes.py | 4 +-- pandas/core/arrays/integer.py | 8 +++--- pandas/core/arrays/interval.py | 28 +++++++++---------- pandas/core/arrays/sparse/dtype.py | 4 +-- pandas/core/arrays/sparse/scipy_sparse.py | 6 ++-- pandas/core/arrays/timedeltas.py | 4 +-- pandas/core/base.py | 10 ++++--- pandas/core/computation/eval.py | 8 +++--- pandas/core/computation/ops.py | 8 +++--- pandas/core/computation/parsing.py | 4 +-- pandas/core/computation/pytables.py | 14 ++++++---- pandas/core/computation/scope.py | 4 +-- pandas/core/dtypes/cast.py | 4 +-- pandas/core/dtypes/common.py | 10 +++---- pandas/core/dtypes/dtypes.py | 4 +-- pandas/core/frame.py | 12 ++++---- pandas/core/generic.py | 18 ++++++------ pandas/core/groupby/generic.py | 8 +++--- pandas/core/groupby/groupby.py | 4 +-- pandas/core/indexers.py | 4 +-- pandas/core/indexes/base.py | 10 ++++--- pandas/core/indexes/datetimelike.py | 4 +-- pandas/core/indexes/datetimes.py | 12 ++++---- pandas/core/indexes/interval.py | 12 ++++---- pandas/core/indexes/multi.py | 12 ++++---- pandas/core/indexes/period.py | 20 ++++++------- pandas/core/indexes/range.py | 8 +++--- pandas/core/indexes/timedeltas.py | 4 +-- pandas/core/indexing.py | 10 +++---- pandas/core/missing.py | 4 +-- pandas/core/nanops.py | 14 +++++----- pandas/core/ops/array_ops.py | 4 +-- pandas/core/reshape/concat.py | 4 +-- pandas/core/reshape/pivot.py | 8 +++--- pandas/core/series.py | 4 +-- pandas/core/strings.py | 4 +-- pandas/core/tools/datetimes.py | 34 +++++++++++++++-------- pandas/core/window/ewm.py | 4 +-- pandas/core/window/rolling.py | 12 ++++---- pandas/io/common.py | 4 +-- pandas/io/excel/_base.py | 4 +-- pandas/io/excel/_util.py | 4 +-- pandas/io/html.py | 2 +- pandas/io/json/_normalize.py | 2 +- pandas/io/parsers.py | 24 ++++++++-------- pandas/io/pytables.py | 34 +++++++++++------------ 52 files changed, 243 insertions(+), 217 deletions(-) diff --git a/doc/make.py b/doc/make.py index 024a748cd28ca..db729853e5834 100755 --- a/doc/make.py +++ b/doc/make.py @@ -83,8 +83,8 @@ def _process_single_doc(self, single_doc): obj = pandas # noqa: F821 for name in single_doc.split("."): obj = getattr(obj, name) - except AttributeError: - raise ImportError(f"Could not import {single_doc}") + except AttributeError as err: + raise ImportError(f"Could not import {single_doc}") from err else: return single_doc[len("pandas.") :] else: diff --git a/pandas/__init__.py b/pandas/__init__.py index 2d3d3f7d92a9c..2b9a461e0e95d 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -37,7 +37,7 @@ f"C extension: {module} not built. If you want to import " "pandas from the source directory, you may need to run " "'python setup.py build_ext --inplace --force' to build the C extensions first." - ) + ) from e from pandas._config import ( get_option, @@ -290,8 +290,8 @@ def __getattr__(self, item): try: return getattr(self.np, item) - except AttributeError: - raise AttributeError(f"module numpy has no attribute {item}") + except AttributeError as err: + raise AttributeError(f"module numpy has no attribute {item}") from err np = __numpy() @@ -306,8 +306,10 @@ def __getattr__(cls, item): try: return getattr(cls.datetime, item) - except AttributeError: - raise AttributeError(f"module datetime has no attribute {item}") + except AttributeError as err: + raise AttributeError( + f"module datetime has no attribute {item}" + ) from err def __instancecheck__(cls, other): return isinstance(other, cls.datetime) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index f1959cd70ed3a..df706bf25097e 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -213,8 +213,8 @@ def __getattr__(self, key: str): prefix += key try: v = object.__getattribute__(self, "d")[key] - except KeyError: - raise OptionError("No such option") + except KeyError as err: + raise OptionError("No such option") from err if isinstance(v, dict): return DictWrapper(v, prefix) else: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 02a979aea6c6b..7201629cb086e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -686,8 +686,8 @@ def value_counts( values = Series(values) try: ii = cut(values, bins, include_lowest=True) - except TypeError: - raise TypeError("bins argument only works with numeric data.") + except TypeError as err: + raise TypeError("bins argument only works with numeric data.") from err # count, remove nulls (from the index), and but the bins result = ii.value_counts(dropna=dropna) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 20e4cf70eddcf..471bfa736d4b9 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -121,8 +121,8 @@ def _generate_range_overflow_safe( # we cannot salvage the operation by recursing, so raise try: addend = np.uint64(periods) * np.uint64(np.abs(stride)) - except FloatingPointError: - raise OutOfBoundsDatetime(msg) + except FloatingPointError as err: + raise OutOfBoundsDatetime(msg) from err if np.abs(addend) <= i64max: # relatively easy case without casting concerns diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a5048e3aae899..4167c75eb5782 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -350,7 +350,7 @@ def __init__( if dtype.categories is None: try: codes, categories = factorize(values, sort=True) - except TypeError: + except TypeError as err: codes, categories = factorize(values, sort=False) if dtype.ordered: # raise, as we don't have a sortable data structure and so @@ -359,13 +359,13 @@ def __init__( "'values' is not ordered, please " "explicitly specify the categories order " "by passing in a categories argument." - ) - except ValueError: + ) from err + except ValueError as err: # FIXME raise NotImplementedError( "> 1 ndim Categorical are not supported at this time" - ) + ) from err # we're inferring from values dtype = CategoricalDtype(categories, dtype.ordered) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a75536e46e60d..56939cda6d21c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2080,11 +2080,11 @@ def _infer_tz_from_endpoints(start, end, tz): """ try: inferred_tz = timezones.infer_tzinfo(start, end) - except AssertionError: + except AssertionError as err: # infer_tzinfo raises AssertionError if passed mismatched timezones raise TypeError( "Start and end cannot both be tz-aware with different timezones" - ) + ) from err inferred_tz = timezones.maybe_get_tz(inferred_tz) tz = timezones.maybe_get_tz(tz) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index f1e0882def13b..e2b66b1a006e4 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -154,7 +154,7 @@ def safe_cast(values, dtype, copy: bool): """ try: return values.astype(dtype, casting="safe", copy=copy) - except TypeError: + except TypeError as err: casted = values.astype(dtype, copy=copy) if (casted == values).all(): @@ -162,7 +162,7 @@ def safe_cast(values, dtype, copy: bool): raise TypeError( f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}" - ) + ) from err def coerce_to_array( @@ -199,8 +199,8 @@ def coerce_to_array( if not issubclass(type(dtype), _IntegerDtype): try: dtype = _dtypes[str(np.dtype(dtype))] - except KeyError: - raise ValueError(f"invalid dtype specified {dtype}") + except KeyError as err: + raise ValueError(f"invalid dtype specified {dtype}") from err if isinstance(values, IntegerArray): values, mask = values._data, values._mask diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index f5167f470b056..51c94d5059f8b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -448,12 +448,12 @@ def from_tuples(cls, data, closed="right", copy=False, dtype=None): try: # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] lhs, rhs = d - except ValueError: + except ValueError as err: msg = f"{name}.from_tuples requires tuples of length 2, got {d}" - raise ValueError(msg) - except TypeError: + raise ValueError(msg) from err + except TypeError as err: msg = f"{name}.from_tuples received an invalid item, {d}" - raise TypeError(msg) + raise TypeError(msg) from err left.append(lhs) right.append(rhs) @@ -538,10 +538,10 @@ def __setitem__(self, key, value): try: array = IntervalArray(value) value_left, value_right = array.left, array.right - except TypeError: + except TypeError as err: # wrong type: not interval or NA msg = f"'value' should be an interval type, got {type(value)} instead." - raise TypeError(msg) + raise TypeError(msg) from err key = check_array_indexer(self, key) # Need to ensure that left and right are updated atomically, so we're @@ -688,20 +688,20 @@ def astype(self, dtype, copy=True): try: new_left = self.left.astype(dtype.subtype) new_right = self.right.astype(dtype.subtype) - except TypeError: + except TypeError as err: msg = ( f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" ) - raise TypeError(msg) + raise TypeError(msg) from err return self._shallow_copy(new_left, new_right) elif is_categorical_dtype(dtype): return Categorical(np.asarray(self)) # TODO: This try/except will be repeated. try: return np.asarray(self).astype(dtype, copy=copy) - except (TypeError, ValueError): + except (TypeError, ValueError) as err: msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" - raise TypeError(msg) + raise TypeError(msg) from err @classmethod def _concat_same_type(cls, to_concat): @@ -1020,13 +1020,13 @@ def length(self): """ try: return self.right - self.left - except TypeError: + except TypeError as err: # length not defined for some types, e.g. string msg = ( "IntervalArray contains Intervals without defined length, " "e.g. Intervals with string endpoints" ) - raise TypeError(msg) + raise TypeError(msg) from err @property def mid(self): @@ -1100,11 +1100,11 @@ def __arrow_array__(self, type=None): try: subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) - except TypeError: + except TypeError as err: raise TypeError( f"Conversion to arrow with subtype '{self.dtype.subtype}' " "is not supported" - ) + ) from err interval_type = ArrowIntervalType(subtype, self.closed) storage_array = pyarrow.StructArray.from_arrays( [ diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 86869f50aab8e..135514e334920 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -217,8 +217,8 @@ def construct_from_string(cls, string: str) -> "SparseDtype": if string.startswith("Sparse"): try: sub_type, has_fill_value = cls._parse_subtype(string) - except ValueError: - raise TypeError(msg) + except ValueError as err: + raise TypeError(msg) from err else: result = SparseDtype(sub_type) msg = ( diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index e77256a5aaadd..eafd782dc9b9c 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -134,8 +134,10 @@ def _coo_to_sparse_series(A, dense_index: bool = False): try: s = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) - except AttributeError: - raise TypeError(f"Expected coo_matrix. Got {type(A).__name__} instead.") + except AttributeError as err: + raise TypeError( + f"Expected coo_matrix. Got {type(A).__name__} instead." + ) from err s = s.sort_index() s = s.astype(SparseDtype(s.dtype)) if dense_index: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a7b16fd86468e..81fc934748d3e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -451,10 +451,10 @@ def _addsub_object_array(self, other, op): # subclasses. Incompatible classes will raise AttributeError, # which we re-raise as TypeError return super()._addsub_object_array(other, op) - except AttributeError: + except AttributeError as err: raise TypeError( f"Cannot add/subtract non-tick DateOffset to {type(self).__name__}" - ) + ) from err def __mul__(self, other): other = lib.item_from_zerodim(other) diff --git a/pandas/core/base.py b/pandas/core/base.py index b9aeb32eea5c1..ff251324d4896 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -458,7 +458,7 @@ def is_any_frame() -> bool: # return a MI Series try: result = concat(result) - except TypeError: + except TypeError as err: # we want to give a nice error here if # we have non-same sized objects, so # we don't automatically broadcast @@ -467,7 +467,7 @@ def is_any_frame() -> bool: "cannot perform both aggregation " "and transformation operations " "simultaneously" - ) + ) from err return result, True @@ -553,7 +553,7 @@ def _aggregate_multiple_funcs(self, arg, _axis): try: return concat(results, keys=keys, axis=1, sort=False) - except TypeError: + except TypeError as err: # we are concatting non-NDFrame objects, # e.g. a list of scalars @@ -562,7 +562,9 @@ def _aggregate_multiple_funcs(self, arg, _axis): result = Series(results, index=keys, name=self.name) if is_nested_object(result): - raise ValueError("cannot combine transform and aggregation operations") + raise ValueError( + "cannot combine transform and aggregation operations" + ) from err return result def _get_cython_func(self, arg: str) -> Optional[str]: diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index f6947d5ec6233..fe3d3f49f16a7 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -362,8 +362,8 @@ def eval( if not inplace and first_expr: try: target = env.target.copy() - except AttributeError: - raise ValueError("Cannot return a copy of the target") + except AttributeError as err: + raise ValueError("Cannot return a copy of the target") from err else: target = env.target @@ -375,8 +375,8 @@ def eval( with warnings.catch_warnings(record=True): # TODO: Filter the warnings we actually care about here. target[assigner] = ret - except (TypeError, IndexError): - raise ValueError("Cannot assign expression output to target") + except (TypeError, IndexError) as err: + raise ValueError("Cannot assign expression output to target") from err if not resolvers: resolvers = ({assigner: ret},) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 7ed089b283903..bc9ff7c44b689 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -372,12 +372,12 @@ def __init__(self, op: str, lhs, rhs): try: self.func = _binary_ops_dict[op] - except KeyError: + except KeyError as err: # has to be made a list for python3 keys = list(_binary_ops_dict.keys()) raise ValueError( f"Invalid binary operator {repr(op)}, valid operators are {keys}" - ) + ) from err def __call__(self, env): """ @@ -550,11 +550,11 @@ def __init__(self, op: str, operand): try: self.func = _unary_ops_dict[op] - except KeyError: + except KeyError as err: raise ValueError( f"Invalid unary operator {repr(op)}, " f"valid operators are {_unary_ops_syms}" - ) + ) from err def __call__(self, env): operand = self.operand(env) diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index 92a2c20cd2a9e..418fc7d38d08f 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -185,7 +185,7 @@ def tokenize_string(source: str) -> Iterator[Tuple[int, str]]: yield tokenize_backtick_quoted_string( token_generator, source, string_start=start[1] + 1 ) - except Exception: - raise SyntaxError(f"Failed to parse backticks in '{source}'.") + except Exception as err: + raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err else: yield toknum, tokval diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 828ec11c2bd38..653d014775386 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -424,8 +424,10 @@ def visit_Subscript(self, node, **kwargs): try: return self.const_type(value[slobj], self.env) - except TypeError: - raise ValueError(f"cannot subscript {repr(value)} with {repr(slobj)}") + except TypeError as err: + raise ValueError( + f"cannot subscript {repr(value)} with {repr(slobj)}" + ) from err def visit_Attribute(self, node, **kwargs): attr = node.attr @@ -575,18 +577,18 @@ def evaluate(self): """ create and return the numexpr condition and filter """ try: self.condition = self.terms.prune(ConditionBinOp) - except AttributeError: + except AttributeError as err: raise ValueError( f"cannot process expression [{self.expr}], [{self}] " "is not a valid condition" - ) + ) from err try: self.filter = self.terms.prune(FilterBinOp) - except AttributeError: + except AttributeError as err: raise ValueError( f"cannot process expression [{self.expr}], [{self}] " "is not a valid filter" - ) + ) from err return self.condition, self.filter diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 937c81fdeb8d6..83bf92ad737e4 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -197,11 +197,11 @@ def resolve(self, key: str, is_local: bool): # these are created when parsing indexing expressions # e.g., df[df > 0] return self.temps[key] - except KeyError: + except KeyError as err: # runtime import because ops imports from scope from pandas.core.computation.ops import UndefinedVariableError - raise UndefinedVariableError(key, is_local) + raise UndefinedVariableError(key, is_local) from err def swapkey(self, old_key: str, new_key: str, new_value=None): """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c2b600b5d8c5b..c06bd8a1d6e36 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1573,11 +1573,11 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): casted = np.array(arr, dtype=dtype, copy=copy) else: casted = arr.astype(dtype, copy=copy) - except OverflowError: + except OverflowError as err: raise OverflowError( "The elements provided in the data cannot all be " f"casted to the dtype {dtype}" - ) + ) from err if np.array_equal(arr, casted): return casted diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index c0420244f671e..df5bac1071985 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -198,8 +198,8 @@ def ensure_python_int(value: Union[int, np.integer]) -> int: try: new_value = int(value) assert new_value == value - except (TypeError, ValueError, AssertionError): - raise TypeError(f"Wrong type {type(value)} for value {value}") + except (TypeError, ValueError, AssertionError) as err: + raise TypeError(f"Wrong type {type(value)} for value {value}") from err return new_value @@ -1801,7 +1801,7 @@ def _validate_date_like_dtype(dtype) -> None: try: typ = np.datetime_data(dtype)[0] except ValueError as e: - raise TypeError(e) + raise TypeError(e) from e if typ != "generic" and typ != "ns": raise ValueError( f"{repr(dtype.name)} is too specific of a frequency, " @@ -1840,9 +1840,9 @@ def pandas_dtype(dtype) -> DtypeObj: # raise a consistent TypeError if failed try: npdtype = np.dtype(dtype) - except SyntaxError: + except SyntaxError as err: # np.dtype uses `eval` which can raise SyntaxError - raise TypeError(f"data type '{dtype}' not understood") + raise TypeError(f"data type '{dtype}' not understood") from err # Any invalid dtype (such as pd.Timestamp) should raise an error. # np.dtype(invalid_type).kind = 0 for such objects. However, this will diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0730de934b56c..33daf6627721f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1040,8 +1040,8 @@ def __new__(cls, subtype=None): try: subtype = pandas_dtype(subtype) - except TypeError: - raise TypeError("could not construct IntervalDtype") + except TypeError as err: + raise TypeError("could not construct IntervalDtype") from err if is_categorical_dtype(subtype) or is_string_dtype(subtype): # GH 19016 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6f5aef4884ccd..f304fadbab871 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2770,11 +2770,11 @@ def _ensure_valid_index(self, value): if not len(self.index) and is_list_like(value) and len(value): try: value = Series(value) - except (ValueError, NotImplementedError, TypeError): + except (ValueError, NotImplementedError, TypeError) as err: raise ValueError( "Cannot set a frame with no defined index " "and a value that cannot be converted to a Series" - ) + ) from err self._data = self._data.reindex_axis( value.index.copy(), axis=1, fill_value=np.nan @@ -3337,7 +3337,7 @@ def reindexer(value): # other raise TypeError( "incompatible index of inserted column with frame index" - ) + ) from err return value if isinstance(value, Series): @@ -4058,8 +4058,10 @@ def set_index( # everything else gets tried as a key; see GH 24969 try: found = col in self.columns - except TypeError: - raise TypeError(f"{err_msg}. Received column of type {type(col)}") + except TypeError as err: + raise TypeError( + f"{err_msg}. Received column of type {type(col)}" + ) from err else: if not found: missing.append(col) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ff7c481d550d4..25770c2c6470c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -335,9 +335,11 @@ def _construct_axes_from_arguments( if a not in kwargs: try: kwargs[a] = args.pop(0) - except IndexError: + except IndexError as err: if require_all: - raise TypeError("not enough/duplicate arguments specified!") + raise TypeError( + "not enough/duplicate arguments specified!" + ) from err axes = {a: kwargs.pop(a, sentinel) for a in cls._AXIS_ORDERS} return axes, kwargs @@ -4792,10 +4794,10 @@ def sample( if axis == 0: try: weights = self[weights] - except KeyError: + except KeyError as err: raise KeyError( "String passed to weights not a valid column" - ) + ) from err else: raise ValueError( "Strings can only be passed to " @@ -7521,8 +7523,8 @@ def at_time( index = self._get_axis(axis) try: indexer = index.indexer_at_time(time, asof=asof) - except AttributeError: - raise TypeError("Index must be DatetimeIndex") + except AttributeError as err: + raise TypeError("Index must be DatetimeIndex") from err return self._take_with_is_copy(indexer, axis=axis) @@ -7609,8 +7611,8 @@ def between_time( include_start=include_start, include_end=include_end, ) - except AttributeError: - raise TypeError("Index must be DatetimeIndex") + except AttributeError as err: + raise TypeError("Index must be DatetimeIndex") from err return self._take_with_is_copy(indexer, axis=axis) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1bb512aee39e2..fb935c9065b83 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -572,8 +572,8 @@ def true_and_notna(x, *args, **kwargs) -> bool: indices = [ self._get_index(name) for name, group in self if true_and_notna(group) ] - except (ValueError, TypeError): - raise TypeError("the filter must return a boolean result") + except (ValueError, TypeError) as err: + raise TypeError("the filter must return a boolean result") from err filtered = self._apply_filter(indices, dropna) return filtered @@ -1371,9 +1371,9 @@ def _transform_general(self, func, *args, **kwargs): path, res = self._choose_path(fast_path, slow_path, group) except TypeError: return self._transform_item_by_item(obj, fast_path) - except ValueError: + except ValueError as err: msg = "transform must return a scalar value for each group" - raise ValueError(msg) + raise ValueError(msg) from err else: res = path(group) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f946f0e63a583..48c00140461b5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -482,13 +482,13 @@ def get_converter(s): try: # If the original grouper was a tuple return [self.indices[name] for name in names] - except KeyError: + except KeyError as err: # turns out it wasn't a tuple msg = ( "must supply a same-length tuple to get_group " "with multiple grouping keys" ) - raise ValueError(msg) + raise ValueError(msg) from err converters = [get_converter(s) for s in index_sample] names = (tuple(f(n) for f, n in zip(converters, name)) for name in names) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 5e53b061dd1c8..3858e750326b4 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -437,10 +437,10 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: elif is_integer_dtype(dtype): try: indexer = np.asarray(indexer, dtype=np.intp) - except ValueError: + except ValueError as err: raise ValueError( "Cannot index with an integer indexer containing NA values" - ) + ) from err else: raise IndexError("arrays used as indices must be of integer or boolean type") diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5b674458e95ee..ae2387f0fd7b4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -670,8 +670,10 @@ def astype(self, dtype, copy=True): try: casted = self.values.astype(dtype, copy=copy) - except (TypeError, ValueError): - raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}") + except (TypeError, ValueError) as err: + raise TypeError( + f"Cannot cast {type(self).__name__} to dtype {dtype}" + ) from err return Index(casted, name=self.name, dtype=dtype) _index_shared_docs[ @@ -2860,8 +2862,8 @@ def get_loc(self, key, method=None, tolerance=None): casted_key = self._maybe_cast_indexer(key) try: return self._engine.get_loc(casted_key) - except KeyError: - raise KeyError(key) + except KeyError as err: + raise KeyError(key) from err if tolerance is not None: tolerance = self._convert_tolerance(tolerance, np.asarray(key)) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d3038ae88652b..654d423b37c81 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -967,11 +967,11 @@ def insert(self, loc, item): ) arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq) return type(self)._simple_new(arr, name=self.name) - except (AttributeError, TypeError): + except (AttributeError, TypeError) as err: # fall back to object index if isinstance(item, str): return self.astype(object).insert(loc, item) raise TypeError( f"cannot insert {type(self).__name__} with incompatible label" - ) + ) from err diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e303e487b1a7d..c9fefd46e55c7 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -552,8 +552,8 @@ def get_loc(self, key, method=None, tolerance=None): try: key = self._maybe_cast_for_get_loc(key) - except ValueError: - raise KeyError(key) + except ValueError as err: + raise KeyError(key) from err elif isinstance(key, timedelta): # GH#20464 @@ -574,8 +574,8 @@ def get_loc(self, key, method=None, tolerance=None): try: return Index.get_loc(self, key, method, tolerance) - except KeyError: - raise KeyError(orig_key) + except KeyError as err: + raise KeyError(orig_key) from err def _maybe_cast_for_get_loc(self, key) -> Timestamp: # needed to localize naive datetimes @@ -1040,9 +1040,9 @@ def bdate_range( try: weekmask = weekmask or "Mon Tue Wed Thu Fri" freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask) - except (KeyError, TypeError): + except (KeyError, TypeError) as err: msg = f"invalid custom frequency string: {freq}" - raise ValueError(msg) + raise ValueError(msg) from err elif holidays or weekmask: msg = ( "a custom frequency string is required when holidays or " diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index a7bb4237eab69..d396d1c76f357 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -724,9 +724,9 @@ def get_loc( op_right = le if self.closed_right else lt try: mask = op_left(self.left, key) & op_right(key, self.right) - except TypeError: + except TypeError as err: # scalar is not comparable to II subtype --> invalid label - raise KeyError(key) + raise KeyError(key) from err matches = mask.sum() if matches == 0: @@ -805,9 +805,9 @@ def get_indexer( loc = self.get_loc(key) except KeyError: loc = -1 - except InvalidIndexError: + except InvalidIndexError as err: # i.e. non-scalar key - raise TypeError(key) + raise TypeError(key) from err indexer.append(loc) return ensure_platform_int(indexer) @@ -1279,10 +1279,10 @@ def interval_range( if freq is not None and not is_number(freq): try: freq = to_offset(freq) - except ValueError: + except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" - ) + ) from err # verify type compatibility if not all( diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4bd462e83a5bc..f70975e19b9a4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1373,9 +1373,9 @@ def _get_level_number(self, level) -> int: ) try: level = self.names.index(level) - except ValueError: + except ValueError as err: if not is_integer(level): - raise KeyError(f"Level {level} not found") + raise KeyError(f"Level {level} not found") from err elif level < 0: level += self.nlevels if level < 0: @@ -1383,13 +1383,13 @@ def _get_level_number(self, level) -> int: raise IndexError( f"Too many levels: Index has only {self.nlevels} levels, " f"{orig_level} is not a valid level number" - ) + ) from err # Note: levels are zero-based elif level >= self.nlevels: raise IndexError( f"Too many levels: Index has only {self.nlevels} levels, " f"not {level + 1}" - ) + ) from err return level @property @@ -3370,8 +3370,8 @@ def _convert_can_do_setop(self, other): msg = "other must be a MultiIndex or a list of tuples" try: other = MultiIndex.from_tuples(other) - except TypeError: - raise TypeError(msg) + except TypeError as err: + raise TypeError(msg) from err else: result_names = self.names if self.names == other.names else None return other, result_names diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 35a5d99abf4e6..9eeb41f735015 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -515,9 +515,9 @@ def get_loc(self, key, method=None, tolerance=None): try: asdt, reso = parse_time_string(key, self.freq) - except DateParseError: + except DateParseError as err: # A string with invalid format - raise KeyError(f"Cannot interpret '{key}' as period") + raise KeyError(f"Cannot interpret '{key}' as period") from err grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) @@ -540,14 +540,14 @@ def get_loc(self, key, method=None, tolerance=None): try: key = Period(key, freq=self.freq) - except ValueError: + except ValueError as err: # we cannot construct the Period - raise KeyError(orig_key) + raise KeyError(orig_key) from err try: return Index.get_loc(self, key, method, tolerance) - except KeyError: - raise KeyError(orig_key) + except KeyError as err: + raise KeyError(orig_key) from err def _maybe_cast_slice_bound(self, label, side: str, kind: str): """ @@ -578,10 +578,10 @@ def _maybe_cast_slice_bound(self, label, side: str, kind: str): parsed, reso = parse_time_string(label, self.freq) bounds = self._parsed_string_to_bounds(reso, parsed) return bounds[0 if side == "left" else 1] - except ValueError: + except ValueError as err: # string cannot be parsed as datetime-like # TODO: we need tests for this case - raise KeyError(label) + raise KeyError(label) from err elif is_integer(label) or is_float(label): self._invalid_indexer("slice", label) @@ -611,8 +611,8 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True try: return self._partial_date_slice(reso, parsed, use_lhs, use_rhs) - except KeyError: - raise KeyError(key) + except KeyError as err: + raise KeyError(key) from err def insert(self, loc, item): if not isinstance(item, Period) or self.freq != item.freq: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 71cc62e6a110b..f621a3c153adf 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -349,8 +349,8 @@ def get_loc(self, key, method=None, tolerance=None): new_key = int(key) try: return self._range.index(new_key) - except ValueError: - raise KeyError(key) + except ValueError as err: + raise KeyError(key) from err raise KeyError(key) return super().get_loc(key, method=method, tolerance=tolerance) @@ -695,10 +695,10 @@ def __getitem__(self, key): new_key = int(key) try: return self._range[new_key] - except IndexError: + except IndexError as err: raise IndexError( f"index {key} is out of bounds for axis 0 with size {len(self)}" - ) + ) from err elif is_scalar(key): raise IndexError( "only integers, slices (`:`), " diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b3b2bc46f6659..5e4a8e83bd95b 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -232,8 +232,8 @@ def get_loc(self, key, method=None, tolerance=None): elif isinstance(key, str): try: key = Timedelta(key) - except ValueError: - raise KeyError(key) + except ValueError as err: + raise KeyError(key) from err elif isinstance(key, self._data._recognized_scalars) or key is NaT: key = Timedelta(key) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a0e96ac169ff7..3ab180bafd156 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -609,7 +609,7 @@ def _get_setitem_indexer(self, key): # invalid indexer type vs 'other' indexing errors if "cannot do" in str(e): raise - raise IndexingError(key) + raise IndexingError(key) from e def __setitem__(self, key, value): if isinstance(key, tuple): @@ -653,11 +653,11 @@ def _has_valid_tuple(self, key: Tuple): raise IndexingError("Too many indexers") try: self._validate_key(k, i) - except ValueError: + except ValueError as err: raise ValueError( "Location based indexing can only have " f"[{self._valid_types}] types" - ) + ) from err def _is_nested_tuple_indexer(self, tup: Tuple) -> bool: """ @@ -1454,9 +1454,9 @@ def _get_list_axis(self, key, axis: int): """ try: return self.obj._take_with_is_copy(key, axis=axis) - except IndexError: + except IndexError as err: # re-raise with different error message - raise IndexError("positional indexers are out-of-bounds") + raise IndexError("positional indexers are out-of-bounds") from err def _getitem_axis(self, key, axis: int): if isinstance(key, slice): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b30a7a24f3495..c46aed999f45a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -343,10 +343,10 @@ def _interpolate_scipy_wrapper( if method == "pchip": try: alt_methods["pchip"] = interpolate.pchip_interpolate - except AttributeError: + except AttributeError as err: raise ImportError( "Your version of Scipy does not support PCHIP interpolation." - ) + ) from err elif method == "akima": alt_methods["akima"] = _akima_interpolate diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a5c609473760d..4398a1569ac56 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -73,7 +73,7 @@ def _f(*args, **kwargs): # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(args[0]): - raise TypeError(e) + raise TypeError(e) from e raise return _f @@ -607,9 +607,9 @@ def get_median(x): if not is_float_dtype(values.dtype): try: values = values.astype("f8") - except ValueError: + except ValueError as err: # e.g. "could not convert string to float: 'a'" - raise TypeError + raise TypeError from err if mask is not None: values[mask] = np.nan @@ -1361,9 +1361,9 @@ def _ensure_numeric(x): except (TypeError, ValueError): try: x = x.astype(np.float64) - except ValueError: + except ValueError as err: # GH#29941 we get here with object arrays containing strs - raise TypeError(f"Could not convert {x} to numeric") + raise TypeError(f"Could not convert {x} to numeric") from err else: if not np.any(np.imag(x)): x = x.real @@ -1374,9 +1374,9 @@ def _ensure_numeric(x): # e.g. "1+1j" or "foo" try: x = complex(x) - except ValueError: + except ValueError as err: # e.g. "foo" - raise TypeError(f"Could not convert {x} to numeric") + raise TypeError(f"Could not convert {x} to numeric") from err return x diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index b216a927f65b3..2c9105c52cf9b 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -295,12 +295,12 @@ def na_logical_op(x: np.ndarray, y, op): AttributeError, OverflowError, NotImplementedError, - ): + ) as err: typ = type(y).__name__ raise TypeError( f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array " f"and scalar of type [{typ}]" - ) + ) from err return result.reshape(x.shape) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index d9f21f0b274ac..06a180d4a096e 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -620,8 +620,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde for key, index in zip(hlevel, indexes): try: i = level.get_loc(key) - except KeyError: - raise ValueError(f"Key {key} not in level {level}") + except KeyError as err: + raise ValueError(f"Key {key} not in level {level}") from err to_concat.append(np.repeat(i, len(index))) codes_list.append(np.concatenate(to_concat)) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b04e4e1ac4d48..61aa34f724307 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -631,8 +631,8 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): axis_subs = {0: "index", 1: "columns"} try: normalize = axis_subs[normalize] - except KeyError: - raise ValueError("Not a valid normalize argument") + except KeyError as err: + raise ValueError("Not a valid normalize argument") from err if margins is False: @@ -647,8 +647,8 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): try: f = normalizers[normalize] - except KeyError: - raise ValueError("Not a valid normalize argument") + except KeyError as err: + raise ValueError("Not a valid normalize argument") from err table = f(table) table = table.fillna(0) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3ded02598963c..d984225f8fd89 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -992,11 +992,11 @@ def __setitem__(self, key, value): except TypeError as e: if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): - raise ValueError("Can only tuple-index with a MultiIndex") + raise ValueError("Can only tuple-index with a MultiIndex") from e # python 3 type errors should be raised if _is_unorderable_exception(e): - raise IndexError(key) + raise IndexError(key) from e if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 4b0fc3e47356c..b0c5d6a48d99a 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2425,12 +2425,12 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"): try: # turn anything in "others" into lists of Series others = self._get_series_list(others) - except ValueError: # do not catch TypeError raised by _get_series_list + except ValueError as err: # do not catch TypeError raised by _get_series_list raise ValueError( "If `others` contains arrays or lists (or other " "list-likes without an index), these must all be " "of the same length as the calling Series/Index." - ) + ) from err # align if required if any(not data.index.equals(x.index) for x in others): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b10b736b9134e..5580146b37d25 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -391,8 +391,10 @@ def _convert_listlike_datetimes( # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) - except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): - raise ValueError("cannot convert the input to '%Y%m%d' date format") + except (ValueError, TypeError, tslibs.OutOfBoundsDatetime) as err: + raise ValueError( + "cannot convert the input to '%Y%m%d' date format" + ) from err # fallback if result is None: @@ -484,8 +486,10 @@ def _adjust_to_origin(arg, origin, unit): raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 - except TypeError: - raise ValueError("incompatible 'arg' type for given 'origin'='julian'") + except TypeError as err: + raise ValueError( + "incompatible 'arg' type for given 'origin'='julian'" + ) from err # preemptively check this for a nice range j_max = Timestamp.max.to_julian_date() - j0 @@ -508,10 +512,14 @@ def _adjust_to_origin(arg, origin, unit): # we are going to offset back to unix / epoch time try: offset = Timestamp(origin) - except tslibs.OutOfBoundsDatetime: - raise tslibs.OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") - except ValueError: - raise ValueError(f"origin {origin} cannot be converted to a Timestamp") + except tslibs.OutOfBoundsDatetime as err: + raise tslibs.OutOfBoundsDatetime( + f"origin {origin} is Out of Bounds" + ) from err + except ValueError as err: + raise ValueError( + f"origin {origin} cannot be converted to a Timestamp" + ) from err if offset.tz is not None: raise ValueError(f"origin offset {offset} must be tz-naive") @@ -861,7 +869,7 @@ def coerce(values): try: values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz) except (TypeError, ValueError) as err: - raise ValueError(f"cannot assemble the datetimes: {err}") + raise ValueError(f"cannot assemble the datetimes: {err}") from err for u in ["h", "m", "s", "ms", "us", "ns"]: value = unit_rev.get(u) @@ -869,7 +877,9 @@ def coerce(values): try: values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) except (TypeError, ValueError) as err: - raise ValueError(f"cannot assemble the datetimes [{value}]: {err}") + raise ValueError( + f"cannot assemble the datetimes [{value}]: {err}" + ) from err return values @@ -1001,13 +1011,13 @@ def _convert_listlike(arg, format): for element in arg: try: times.append(datetime.strptime(element, format).time()) - except (ValueError, TypeError): + except (ValueError, TypeError) as err: if errors == "raise": msg = ( f"Cannot convert {element} to a time with given " f"format {format}" ) - raise ValueError(msg) + raise ValueError(msg) from err elif errors == "ignore": return arg else: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index e045d1c2211d7..c6096c24ecbc9 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -219,13 +219,13 @@ def _apply(self, func, **kwargs): try: values = self._prep_values(b.values) - except (TypeError, NotImplementedError): + except (TypeError, NotImplementedError) as err: if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) del block_list[i] continue else: - raise DataError("No numeric types to aggregate") + raise DataError("No numeric types to aggregate") from err if values.size == 0: results.append(values.copy()) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 65ac064a1322e..3784989de10ab 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -267,8 +267,8 @@ def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: else: try: values = ensure_float64(values) - except (ValueError, TypeError): - raise TypeError(f"cannot handle this type -> {values.dtype}") + except (ValueError, TypeError) as err: + raise TypeError(f"cannot handle this type -> {values.dtype}") from err # Convert inf to nan for C funcs inf = np.isinf(values) @@ -449,13 +449,13 @@ def _apply( try: values = self._prep_values(b.values) - except (TypeError, NotImplementedError): + except (TypeError, NotImplementedError) as err: if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) del block_list[i] continue else: - raise DataError("No numeric types to aggregate") + raise DataError("No numeric types to aggregate") from err if values.size == 0: results.append(values.copy()) @@ -1875,11 +1875,11 @@ def _validate_freq(self): try: return to_offset(self.window) - except (TypeError, ValueError): + except (TypeError, ValueError) as err: raise ValueError( f"passed window {self.window} is not " "compatible with a datetimelike index" - ) + ) from err _agg_see_also_doc = dedent( """ diff --git a/pandas/io/common.py b/pandas/io/common.py index c52583eed27ec..0fce8f5382686 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -265,8 +265,8 @@ def get_compression_method( compression_args = dict(compression) try: compression = compression_args.pop("method") - except KeyError: - raise ValueError("If mapping, compression must have key 'method'") + except KeyError as err: + raise ValueError("If mapping, compression must have key 'method'") from err else: compression_args = {} return compression, compression_args diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 97959bd125113..d2f9dd285582f 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -628,8 +628,8 @@ def __new__(cls, path, engine=None, **kwargs): engine = config.get_option(f"io.excel.{ext}.writer") if engine == "auto": engine = _get_default_writer(ext) - except KeyError: - raise ValueError(f"No engine for filetype: '{ext}'") + except KeyError as err: + raise ValueError(f"No engine for filetype: '{ext}'") from err cls = get_writer(engine) return object.__new__(cls) diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index c8d40d7141fc8..7c8e1abb497bc 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -47,8 +47,8 @@ def _get_default_writer(ext): def get_writer(engine_name): try: return _writers[engine_name] - except KeyError: - raise ValueError(f"No Excel writer '{engine_name}'") + except KeyError as err: + raise ValueError(f"No Excel writer '{engine_name}'") from err def _excel2num(x): diff --git a/pandas/io/html.py b/pandas/io/html.py index 561570f466b68..9efdacadce83e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -904,7 +904,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): "Since you passed a non-rewindable file " "object, we can't rewind it to try " "another parser. Try read_html() with a different flavor." - ) + ) from caught retained = caught else: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index f158ad6cd89e3..4b153d3cb69bf 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -315,7 +315,7 @@ def _recursive_extract(data, path, seen_meta, level=0): raise KeyError( "Try running with errors='ignore' as key " f"{e} is not always present" - ) + ) from e meta_vals[key].append(meta_val) records.extend(recs) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8a3ad6cb45b57..9d1687e20a949 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -814,8 +814,10 @@ def __init__(self, f, engine=None, **kwds): ): try: dialect_val = getattr(dialect, param) - except AttributeError: - raise ValueError(f"Invalid dialect {kwds['dialect']} provided") + except AttributeError as err: + raise ValueError( + f"Invalid dialect {kwds['dialect']} provided" + ) from err parser_default = _parser_defaults[param] provided = kwds.get(param, parser_default) @@ -1816,19 +1818,19 @@ def _cast_types(self, values, cast_type, column): array_type = cast_type.construct_array_type() try: return array_type._from_sequence_of_strings(values, dtype=cast_type) - except NotImplementedError: + except NotImplementedError as err: raise NotImplementedError( f"Extension Array: {array_type} must implement " "_from_sequence_of_strings in order to be used in parser methods" - ) + ) from err else: try: values = astype_nansafe(values, cast_type, copy=True, skipna=True) - except ValueError: + except ValueError as err: raise ValueError( f"Unable to convert column {column} to type {cast_type}" - ) + ) from err return values def _do_date_conversions(self, names, data): @@ -2552,12 +2554,12 @@ def _infer_columns(self): while self.line_pos <= hr: line = self._next_line() - except StopIteration: + except StopIteration as err: if self.line_pos < hr: raise ValueError( f"Passed header={hr} but only {self.line_pos + 1} lines in " "file" - ) + ) from err # We have an empty file, so check # if columns are provided. That will @@ -2569,7 +2571,7 @@ def _infer_columns(self): return columns, num_original_columns, unnamed_cols if not self.names: - raise EmptyDataError("No columns to parse from file") + raise EmptyDataError("No columns to parse from file") from err line = self.names[:] @@ -2650,9 +2652,9 @@ def _infer_columns(self): try: line = self._buffered_line() - except StopIteration: + except StopIteration as err: if not names: - raise EmptyDataError("No columns to parse from file") + raise EmptyDataError("No columns to parse from file") from err line = names[:] diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 048aa8b1915d1..168666ea21f45 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -682,7 +682,7 @@ def open(self, mode: str = "a", **kwargs): # trying to read from a non-existent file causes an error which # is not part of IOError, make it one if self._mode == "r" and "Unable to open/create file" in str(err): - raise IOError(str(err)) + raise IOError(str(err)) from err raise def close(self): @@ -1069,14 +1069,14 @@ def remove(self, key: str, where=None, start=None, stop=None): except AssertionError: # surface any assertion errors for e.g. debugging raise - except Exception: + except Exception as err: # In tests we get here with ClosedFileError, TypeError, and # _table_mod.NoSuchNodeError. TODO: Catch only these? if where is not None: raise ValueError( "trying to remove a node with a non-None where clause!" - ) + ) from err # we are actually trying to remove a node (with children) node = self.get_node(key) @@ -1521,8 +1521,8 @@ def _validate_format(self, format: str) -> str: # validate try: format = _FORMAT_MAP[format.lower()] - except KeyError: - raise TypeError(f"invalid HDFStore format specified [{format}]") + except KeyError as err: + raise TypeError(f"invalid HDFStore format specified [{format}]") from err return format @@ -1579,8 +1579,8 @@ def error(t): _STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed} try: cls = _STORER_MAP[pt] - except KeyError: - raise error("_STORER_MAP") + except KeyError as err: + raise error("_STORER_MAP") from err return cls(self, group, encoding=encoding, errors=errors) # existing node (and must be a table) @@ -1614,8 +1614,8 @@ def error(t): } try: cls = _TABLE_MAP[tt] - except KeyError: - raise error("_TABLE_MAP") + except KeyError as err: + raise error("_TABLE_MAP") from err return cls(self, group, encoding=encoding, errors=errors) @@ -3233,10 +3233,10 @@ def validate_multiindex(self, obj): ] try: return obj.reset_index(), levels - except ValueError: + except ValueError as err: raise ValueError( "duplicate names/columns in the multi-index when storing as a table" - ) + ) from err @property def nrows_expected(self) -> int: @@ -3784,11 +3784,11 @@ def get_blk_items(mgr, blocks): if table_exists and validate: try: existing_col = self.values_axes[i] - except (IndexError, KeyError): + except (IndexError, KeyError) as err: raise ValueError( f"Incompatible appended table [{blocks}]" f"with existing table [{self.values_axes}]" - ) + ) from err else: existing_col = None @@ -3899,12 +3899,12 @@ def get_blk_items(mgr, blocks): b, b_items = by_items.pop(items) new_blocks.append(b) new_blk_items.append(b_items) - except (IndexError, KeyError): + except (IndexError, KeyError) as err: jitems = ",".join(pprint_thing(item) for item in items) raise ValueError( f"cannot match existing table structure for [{jitems}] " "on appending data" - ) + ) from err blocks = new_blocks blk_items = new_blk_items @@ -5061,7 +5061,7 @@ def generate(self, where): q = self.table.queryables() try: return PyTablesExpr(where, queryables=q, encoding=self.table.encoding) - except NameError: + except NameError as err: # raise a nice message, suggesting that the user should use # data_columns qkeys = ",".join(q.keys()) @@ -5073,7 +5073,7 @@ def generate(self, where): " an axis (e.g. 'index' or 'columns'), or a " "data_column\n" f" The currently defined references are: {qkeys}\n" - ) + ) from err def select(self): """ From c20528e668a14d4eadb4d514c43273d22753247e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 28 Feb 2020 02:55:09 -0800 Subject: [PATCH 189/388] REF: test_first_valid_index (#32317) --- pandas/tests/frame/test_timeseries.py | 62 +------------ pandas/tests/generic/methods/__init__.py | 3 + .../generic/methods/test_first_valid_index.py | 90 +++++++++++++++++++ pandas/tests/series/test_timeseries.py | 33 ------- 4 files changed, 94 insertions(+), 94 deletions(-) create mode 100644 pandas/tests/generic/methods/__init__.py create mode 100644 pandas/tests/generic/methods/test_first_valid_index.py diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 5956f73bb11f0..d8eeed69d66e2 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -1,8 +1,7 @@ import numpy as np -import pytest import pandas as pd -from pandas import DataFrame, Series, date_range, to_datetime +from pandas import DataFrame, date_range, to_datetime import pandas._testing as tm @@ -60,65 +59,6 @@ def test_frame_append_datetime64_col_other_units(self): assert (tmp["dates"].values == ex_vals).all() - @pytest.mark.parametrize( - "data,idx,expected_first,expected_last", - [ - ({"A": [1, 2, 3]}, [1, 1, 2], 1, 2), - ({"A": [1, 2, 3]}, [1, 2, 2], 1, 2), - ({"A": [1, 2, 3, 4]}, ["d", "d", "d", "d"], "d", "d"), - ({"A": [1, np.nan, 3]}, [1, 1, 2], 1, 2), - ({"A": [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), - ({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2), - ], - ) - def test_first_last_valid( - self, float_frame, data, idx, expected_first, expected_last - ): - N = len(float_frame.index) - mat = np.random.randn(N) - mat[:5] = np.nan - mat[-5:] = np.nan - - frame = DataFrame({"foo": mat}, index=float_frame.index) - index = frame.first_valid_index() - - assert index == frame.index[5] - - index = frame.last_valid_index() - assert index == frame.index[-6] - - # GH12800 - empty = DataFrame() - assert empty.last_valid_index() is None - assert empty.first_valid_index() is None - - # GH17400: no valid entries - frame[:] = np.nan - assert frame.last_valid_index() is None - assert frame.first_valid_index() is None - - # GH20499: its preserves freq with holes - frame.index = date_range("20110101", periods=N, freq="B") - frame.iloc[1] = 1 - frame.iloc[-2] = 1 - assert frame.first_valid_index() == frame.index[1] - assert frame.last_valid_index() == frame.index[-2] - assert frame.first_valid_index().freq == frame.index.freq - assert frame.last_valid_index().freq == frame.index.freq - - # GH 21441 - df = DataFrame(data, index=idx) - assert expected_first == df.first_valid_index() - assert expected_last == df.last_valid_index() - - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_first_valid_index_all_nan(self, klass): - # GH#9752 Series/DataFrame should both return None, not raise - obj = klass([np.nan]) - - assert obj.first_valid_index() is None - assert obj.iloc[:0].first_valid_index() is None - def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]}) diff --git a/pandas/tests/generic/methods/__init__.py b/pandas/tests/generic/methods/__init__.py new file mode 100644 index 0000000000000..5d18f97b8a55e --- /dev/null +++ b/pandas/tests/generic/methods/__init__.py @@ -0,0 +1,3 @@ +""" +Tests for methods shared by DataFrame and Series. +""" diff --git a/pandas/tests/generic/methods/test_first_valid_index.py b/pandas/tests/generic/methods/test_first_valid_index.py new file mode 100644 index 0000000000000..bca3452c3c458 --- /dev/null +++ b/pandas/tests/generic/methods/test_first_valid_index.py @@ -0,0 +1,90 @@ +""" +Includes test for last_valid_index. +""" +import numpy as np +import pytest + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestFirstValidIndex: + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_first_valid_index_single_nan(self, klass): + # GH#9752 Series/DataFrame should both return None, not raise + obj = klass([np.nan]) + + assert obj.first_valid_index() is None + assert obj.iloc[:0].first_valid_index() is None + + @pytest.mark.parametrize( + "empty", [DataFrame(), Series(dtype=object), Series([], index=[], dtype=object)] + ) + def test_first_valid_index_empty(self, empty): + # GH#12800 + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None + + @pytest.mark.parametrize( + "data,idx,expected_first,expected_last", + [ + ({"A": [1, 2, 3]}, [1, 1, 2], 1, 2), + ({"A": [1, 2, 3]}, [1, 2, 2], 1, 2), + ({"A": [1, 2, 3, 4]}, ["d", "d", "d", "d"], "d", "d"), + ({"A": [1, np.nan, 3]}, [1, 1, 2], 1, 2), + ({"A": [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), + ({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2), + ], + ) + def test_first_last_valid_frame(self, data, idx, expected_first, expected_last): + # GH#21441 + df = DataFrame(data, index=idx) + assert expected_first == df.first_valid_index() + assert expected_last == df.last_valid_index() + + @pytest.mark.parametrize("index_func", [tm.makeStringIndex, tm.makeDateIndex]) + def test_first_last_valid(self, index_func): + N = 30 + index = index_func(N) + mat = np.random.randn(N) + mat[:5] = np.nan + mat[-5:] = np.nan + + frame = DataFrame({"foo": mat}, index=index) + assert frame.first_valid_index() == frame.index[5] + assert frame.last_valid_index() == frame.index[-6] + + ser = frame["foo"] + assert ser.first_valid_index() == frame.index[5] + assert ser.last_valid_index() == frame.index[-6] + + @pytest.mark.parametrize("index_func", [tm.makeStringIndex, tm.makeDateIndex]) + def test_first_last_valid_all_nan(self, index_func): + # GH#17400: no valid entries + index = index_func(30) + frame = DataFrame(np.nan, columns=["foo"], index=index) + + assert frame.last_valid_index() is None + assert frame.first_valid_index() is None + + ser = frame["foo"] + assert ser.first_valid_index() is None + assert ser.last_valid_index() is None + + def test_first_last_valid_preserves_freq(self): + # GH#20499: its preserves freq with holes + index = date_range("20110101", periods=30, freq="B") + frame = DataFrame(np.nan, columns=["foo"], index=index) + + frame.iloc[1] = 1 + frame.iloc[-2] = 1 + assert frame.first_valid_index() == frame.index[1] + assert frame.last_valid_index() == frame.index[-2] + assert frame.first_valid_index().freq == frame.index.freq + assert frame.last_valid_index().freq == frame.index.freq + + ts = frame["foo"] + assert ts.first_valid_index() == ts.index[1] + assert ts.last_valid_index() == ts.index[-2] + assert ts.first_valid_index().freq == ts.index.freq + assert ts.last_valid_index().freq == ts.index.freq diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index c4b2e2edd845a..9796a32532b99 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -48,39 +48,6 @@ def test_autocorr(self, datetime_series): else: assert corr1 == corr2 - def test_first_last_valid(self, datetime_series): - ts = datetime_series.copy() - ts[:5] = np.NaN - - index = ts.first_valid_index() - assert index == ts.index[5] - - ts[-5:] = np.NaN - index = ts.last_valid_index() - assert index == ts.index[-6] - - ts[:] = np.nan - assert ts.last_valid_index() is None - assert ts.first_valid_index() is None - - ser = Series([], index=[], dtype=object) - assert ser.last_valid_index() is None - assert ser.first_valid_index() is None - - # GH12800 - empty = Series(dtype=object) - assert empty.last_valid_index() is None - assert empty.first_valid_index() is None - - # GH20499: its preserves freq with holes - ts.index = date_range("20110101", periods=len(ts), freq="B") - ts.iloc[1] = 1 - ts.iloc[-2] = 1 - assert ts.first_valid_index() == ts.index[1] - assert ts.last_valid_index() == ts.index[-2] - assert ts.first_valid_index().freq == ts.index.freq - assert ts.last_valid_index().freq == ts.index.freq - def test_mpl_compat_hack(self, datetime_series): # This is currently failing because the test was relying on From 77ee2bfc254f591058716b26dbc6c07e61c10002 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 28 Feb 2020 03:47:12 -0800 Subject: [PATCH 190/388] REF/TST: misplaced MultiIndex tests (#32314) --- .../tests/indexes/multi/test_constructors.py | 75 +++++- pandas/tests/indexes/multi/test_duplicates.py | 26 ++ pandas/tests/indexes/multi/test_format.py | 14 +- pandas/tests/indexes/multi/test_indexing.py | 38 +++ pandas/tests/indexes/multi/test_lexsort.py | 46 ++++ pandas/tests/indexes/multi/test_missing.py | 10 + pandas/tests/indexes/multi/test_reshape.py | 50 ++++ pandas/tests/test_multilevel.py | 243 +----------------- 8 files changed, 258 insertions(+), 244 deletions(-) create mode 100644 pandas/tests/indexes/multi/test_lexsort.py diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 2c4b3ce04f96d..1157c7f8bb962 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -1,3 +1,6 @@ +from datetime import date, datetime +import itertools + import numpy as np import pytest @@ -6,7 +9,7 @@ from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike import pandas as pd -from pandas import Index, MultiIndex, date_range +from pandas import Index, MultiIndex, Series, date_range import pandas._testing as tm @@ -723,3 +726,73 @@ def test_index_equal_empty_iterable(): a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) tm.assert_index_equal(a, b) + + +def test_raise_invalid_sortorder(): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2, + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1, + ) + + +def test_datetimeindex(): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo", + ) + idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = date.today() + date2 = datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + +def test_constructor_with_tz(): + + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([Series(index), Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 93e1de535835f..5e17a19335c7e 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -274,3 +274,29 @@ def test_duplicated2(): tm.assert_numpy_array_equal( mi.duplicated(), np.zeros(len(mi), dtype="bool") ) + + +def test_duplicated_drop_duplicates(): + # GH#4060 + idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) + + expected = np.array([False, False, False, True, False, False], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(), expected) + + expected = np.array([True, False, False, False, False, False]) + duplicated = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) + + expected = np.array([True, False, False, True, False, False]) + duplicated = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 75f23fb2f32ba..75499bd79cca0 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -1,9 +1,10 @@ import warnings +import numpy as np import pytest import pandas as pd -from pandas import MultiIndex +from pandas import Index, MultiIndex import pandas._testing as tm @@ -76,6 +77,17 @@ def test_repr_max_seq_item_setting(idx): class TestRepr: + def test_unicode_repr_issues(self): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + + # FIXME: dont leave commented-out + # NumPy bug + # repr(index.get_level_values(1)) + def test_repr(self, idx): result = idx[:1].__repr__() expected = """\ diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 39049006edb7c..b7d7b3b459aff 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -498,3 +498,41 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id idx = MultiIndex.from_arrays(index_arr) result = idx.slice_indexer(start=start_idx, end=end_idx) assert result == expected + + +def test_pyint_engine(): + # GH#18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [ + tuple(l) + for l in [ + [0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N, + ] + ] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py new file mode 100644 index 0000000000000..1d2ad8e02697e --- /dev/null +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -0,0 +1,46 @@ +from pandas import MultiIndex + + +class TestIsLexsorted: + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + ) + assert index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] + ) + assert not index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] + ) + assert not index.is_lexsorted() + assert index.lexsort_depth == 0 + + +class TestLexsortDepth: + def test_lexsort_depth(self): + # Test that lexsort_depth return the correct sortorder + # when it was given to the MultiIndex const. + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + assert index.lexsort_depth == 2 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 + ) + assert index.lexsort_depth == 1 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 + ) + assert index.lexsort_depth == 0 diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index a17e1e9928bff..54ffec2e03fd3 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -141,3 +141,13 @@ def test_nan_stays_float(): assert pd.isna(df0.index.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() + + +def test_tuples_have_na(): + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + assert pd.isna(index[4][0]) + assert pd.isna(index.values[4][0]) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 2e39c714ca7af..de32bd94be491 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -1,5 +1,8 @@ +from datetime import datetime + import numpy as np import pytest +import pytz import pandas as pd from pandas import Index, MultiIndex @@ -95,6 +98,53 @@ def test_append(idx): assert result.equals(idx) +def test_append_index(): + idx1 = Index([1.1, 1.2, 1.3]) + idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") + idx3 = Index(["A", "B", "C"]) + + midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) + midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) + + result = idx1.append(midx_lv2) + + # see gh-7112 + tz = pytz.timezone("Asia/Tokyo") + expected_tuples = [ + (1.1, tz.localize(datetime(2011, 1, 1))), + (1.2, tz.localize(datetime(2011, 1, 2))), + (1.3, tz.localize(datetime(2011, 1, 3))), + ] + expected = Index([1.1, 1.2, 1.3] + expected_tuples) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(idx1) + expected = Index(expected_tuples + [1.1, 1.2, 1.3]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv2) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv3) + tm.assert_index_equal(result, expected) + + result = midx_lv3.append(midx_lv2) + expected = Index._simple_new( + np.array( + [ + (1.1, tz.localize(datetime(2011, 1, 1)), "A"), + (1.2, tz.localize(datetime(2011, 1, 2)), "B"), + (1.3, tz.localize(datetime(2011, 1, 3)), "C"), + ] + + expected_tuples, + dtype=object, + ), + None, + ) + tm.assert_index_equal(result, expected) + + def test_repeat(): reps = 2 numbers = [1, 2, 3] diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index efaedfad1e093..e3cf46b466ae4 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -6,12 +6,11 @@ import numpy as np from numpy.random import randn import pytest -import pytz from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, isna +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp import pandas._testing as tm AGG_FUNCTIONS = [ @@ -80,52 +79,6 @@ def test_append(self): result = a["A"].append(b["A"]) tm.assert_series_equal(result, self.frame["A"]) - def test_append_index(self): - idx1 = Index([1.1, 1.2, 1.3]) - idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") - idx3 = Index(["A", "B", "C"]) - - midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) - midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) - - result = idx1.append(midx_lv2) - - # see gh-7112 - tz = pytz.timezone("Asia/Tokyo") - expected_tuples = [ - (1.1, tz.localize(datetime.datetime(2011, 1, 1))), - (1.2, tz.localize(datetime.datetime(2011, 1, 2))), - (1.3, tz.localize(datetime.datetime(2011, 1, 3))), - ] - expected = Index([1.1, 1.2, 1.3] + expected_tuples) - tm.assert_index_equal(result, expected) - - result = midx_lv2.append(idx1) - expected = Index(expected_tuples + [1.1, 1.2, 1.3]) - tm.assert_index_equal(result, expected) - - result = midx_lv2.append(midx_lv2) - expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) - tm.assert_index_equal(result, expected) - - result = midx_lv2.append(midx_lv3) - tm.assert_index_equal(result, expected) - - result = midx_lv3.append(midx_lv2) - expected = Index._simple_new( - np.array( - [ - (1.1, tz.localize(datetime.datetime(2011, 1, 1)), "A"), - (1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"), - (1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"), - ] - + expected_tuples, - dtype=object, - ), - None, - ) - tm.assert_index_equal(result, expected) - def test_dataframe_constructor(self): multi = DataFrame( np.random.randn(4, 4), @@ -1265,43 +1218,6 @@ def test_unstack_group_index_overflow(self): result = s.unstack(4) assert result.shape == (500, 2) - def test_pyint_engine(self): - # GH 18519 : when combinations of codes cannot be represented in 64 - # bits, the index underlying the MultiIndex engine works with Python - # integers, rather than uint64. - N = 5 - keys = [ - tuple(l) - for l in [ - [0] * 10 * N, - [1] * 10 * N, - [2] * 10 * N, - [np.nan] * N + [2] * 9 * N, - [0] * N + [2] * 9 * N, - [np.nan] * N + [2] * 8 * N + [0] * N, - ] - ] - # Each level contains 4 elements (including NaN), so it is represented - # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a - # 64 bit engine and truncating the first levels, the fourth and fifth - # keys would collide; if truncating the last levels, the fifth and - # sixth; if rotating bits rather than shifting, the third and fifth. - - for idx in range(len(keys)): - index = MultiIndex.from_tuples(keys) - assert index.get_loc(keys[idx]) == idx - - expected = np.arange(idx + 1, dtype=np.intp) - result = index.get_indexer([keys[i] for i in expected]) - tm.assert_numpy_array_equal(result, expected) - - # With missing key: - idces = range(len(keys)) - expected = np.array([-1] + list(idces), dtype=np.intp) - missing = tuple([0, 1] * 5 * N) - result = index.get_indexer([missing] + [keys[i] for i in idces]) - tm.assert_numpy_array_equal(result, expected) - def test_to_html(self): self.ymd.columns.name = "foo" self.ymd.to_html() @@ -1545,16 +1461,6 @@ def test_drop_preserve_names(self): result = df.drop([(0, 2)]) assert result.index.names == ("one", "two") - def test_unicode_repr_issues(self): - levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] - codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] - index = MultiIndex(levels=levels, codes=codes) - - repr(index.levels) - - # NumPy bug - # repr(index.get_level_values(1)) - def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) @@ -1631,15 +1537,6 @@ def test_assign_index_sequences(self): df.index = index repr(df) - def test_tuples_have_na(self): - index = MultiIndex( - levels=[[1, 0], [0, 1, 2, 3]], - codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], - ) - - assert isna(index[4][0]) - assert isna(index.values[4][0]) - def test_duplicate_groupby_issues(self): idx_tp = [ ("600809", "20061231"), @@ -1677,31 +1574,6 @@ def test_duplicate_mi(self): result = df.loc[("foo", "bar")] tm.assert_frame_equal(result, expected) - def test_duplicated_drop_duplicates(self): - # GH 4060 - idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) - - expected = np.array([False, False, False, True, False, False], dtype=bool) - duplicated = idx.duplicated() - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) - tm.assert_index_equal(idx.drop_duplicates(), expected) - - expected = np.array([True, False, False, False, False, False]) - duplicated = idx.duplicated(keep="last") - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) - tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) - - expected = np.array([True, False, False, True, False, False]) - duplicated = idx.duplicated(keep=False) - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) - tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) - def test_multiindex_set_index(self): # segfault in #3308 d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]} @@ -1713,53 +1585,6 @@ def test_multiindex_set_index(self): # it works! df.set_index(index) - def test_datetimeindex(self): - idx1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, - tz="Asia/Tokyo", - ) - idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") - idx = MultiIndex.from_arrays([idx1, idx2]) - - expected1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" - ) - - tm.assert_index_equal(idx.levels[0], expected1) - tm.assert_index_equal(idx.levels[1], idx2) - - # from datetime combos - # GH 7888 - date1 = datetime.date.today() - date2 = datetime.datetime.today() - date3 = Timestamp.today() - - for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): - index = MultiIndex.from_product([[d1], [d2]]) - assert isinstance(index.levels[0], pd.DatetimeIndex) - assert isinstance(index.levels[1], pd.DatetimeIndex) - - def test_constructor_with_tz(self): - - index = pd.DatetimeIndex( - ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" - ) - columns = pd.DatetimeIndex( - ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" - ) - - result = MultiIndex.from_arrays([index, columns]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) - - result = MultiIndex.from_arrays([Series(index), Series(columns)]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) - def test_set_index_datetime(self): # GH 3950 df = DataFrame( @@ -2210,72 +2035,6 @@ def test_sort_index_categorical_multiindex(self): ) tm.assert_frame_equal(result, expected) - def test_is_lexsorted(self): - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - ) - assert index.is_lexsorted() - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] - ) - assert not index.is_lexsorted() - - index = MultiIndex( - levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] - ) - assert not index.is_lexsorted() - assert index.lexsort_depth == 0 - - def test_raise_invalid_sortorder(self): - # Test that the MultiIndex constructor raise when a incorrect sortorder is given - # Issue #28518 - - levels = [[0, 1], [0, 1, 2]] - - # Correct sortorder - MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 - ) - - with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): - MultiIndex( - levels=levels, - codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], - sortorder=2, - ) - - with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): - MultiIndex( - levels=levels, - codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], - sortorder=1, - ) - - def test_lexsort_depth(self): - # Test that lexsort_depth return the correct sortorder - # when it was given to the MultiIndex const. - # Issue #28518 - - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 - ) - assert index.lexsort_depth == 2 - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 - ) - assert index.lexsort_depth == 1 - - index = MultiIndex( - levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 - ) - assert index.lexsort_depth == 0 - def test_sort_index_and_reconstruction(self): # 15622 From 4e5e73e2318a1221eb56ae7e68688278ca87ca6f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 28 Feb 2020 03:49:23 -0800 Subject: [PATCH 191/388] TST: remove invalid internals tests (#32297) --- pandas/core/internals/managers.py | 6 +++--- pandas/tests/internals/test_internals.py | 6 +----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 329bfdf543c62..e397167e4881f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -711,16 +711,16 @@ def combine(self, blocks, copy=True): return type(self)(new_blocks, axes, do_integrity_check=False) def get_slice(self, slobj: slice, axis: int = 0): - if axis >= self.ndim: - raise IndexError("Requested axis not found in manager") if axis == 0: new_blocks = self._slice_take_blocks_ax0(slobj) - else: + elif axis == 1: _slicer = [slice(None)] * (axis + 1) _slicer[axis] = slobj slicer = tuple(_slicer) new_blocks = [blk.getitem_block(slicer) for blk in self.blocks] + else: + raise IndexError("Requested axis not found in manager") new_axes = list(self.axes) new_axes[axis] = new_axes[axis][slobj] diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 27b0500983afd..0b9d84d261708 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -748,11 +748,6 @@ class TestIndexing: create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)), create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)), create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)), - # 3-dim - create_mgr("a,b,c,d,e,f: f8", item_shape=(N, N)), - create_mgr("a,b,c,d,e,f: i8", item_shape=(N, N)), - create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N, N)), - create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N, N)), ] @pytest.mark.parametrize("mgr", MANAGERS) @@ -775,6 +770,7 @@ def assert_slice_ok(mgr, axis, slobj): ) tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis]) + assert mgr.ndim <= 2, mgr.ndim for ax in range(mgr.ndim): # slice assert_slice_ok(mgr, ax, slice(None)) From bf613c14f41b624b432d0d6b9a29007e7990d460 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Fri, 28 Feb 2020 15:40:31 +0100 Subject: [PATCH 192/388] TST: Use `sort` fixture in more places (#32292) --- .../tests/indexes/base_class/test_setops.py | 3 --- pandas/tests/indexes/common.py | 1 - pandas/tests/indexes/conftest.py | 18 +++++++++++++++ pandas/tests/indexes/datetimes/test_join.py | 1 - pandas/tests/indexes/datetimes/test_setops.py | 18 --------------- pandas/tests/indexes/interval/test_setops.py | 6 ----- pandas/tests/indexes/multi/test_setops.py | 8 ------- pandas/tests/indexes/period/test_setops.py | 6 ----- pandas/tests/indexes/ranges/test_setops.py | 2 -- pandas/tests/indexes/test_base.py | 22 ------------------- .../tests/indexes/timedeltas/test_setops.py | 7 ------ 11 files changed, 18 insertions(+), 74 deletions(-) create mode 100644 pandas/tests/indexes/conftest.py diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index ec3ef8050967c..77b5e2780464d 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -91,7 +91,6 @@ def test_union_sort_other_incomparable_true(self): with pytest.raises(TypeError, match=".*"): idx.union(idx[:1], sort=True) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = Index([0, "a", 1, "b", 2, "c"]) @@ -103,7 +102,6 @@ def test_intersection_base(self, sort): tm.assert_index_equal(result, expected) @pytest.mark.parametrize("klass", [np.array, Series, list]) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_different_type_base(self, klass, sort): # GH 10149 index = Index([0, "a", 1, "b", 2, "c"]) @@ -123,7 +121,6 @@ def test_intersection_equal_sort(self): tm.assert_index_equal(idx.intersection(idx, sort=False), idx) tm.assert_index_equal(idx.intersection(idx, sort=None), idx) - @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = Index([0, "a", 1, "b", 2, "c"]) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index dca317a9eb03f..69451501fd7bd 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -512,7 +512,6 @@ def test_union_base(self, indices): with pytest.raises(TypeError, match=msg): first.union([1, 2, 3]) - @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort, indices): first = indices[2:] second = indices[:4] diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py new file mode 100644 index 0000000000000..a9fb228073ab4 --- /dev/null +++ b/pandas/tests/indexes/conftest.py @@ -0,0 +1,18 @@ +import pytest + + +@pytest.fixture(params=[None, False]) +def sort(request): + """ + Valid values for the 'sort' parameter used in the Index + setops methods (intersection, union, etc.) + + Caution: + Don't confuse this one with the "sort" fixture used + for DataFrame.append or concat. That one has + parameters [True, False]. + + We can't combine them as sort=True is not permitted + in in the Index setops methods. + """ + return request.param diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index f2f88fd7dc90c..9a9c94fa19e6d 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -64,7 +64,6 @@ def test_join_utc_convert(self, join_type): assert isinstance(result, DatetimeIndex) assert result.tz.zone == "UTC" - @pytest.mark.parametrize("sort", [None, False]) def test_datetimeindex_union_join_empty(self, sort): dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") empty = Index([]) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index ba069f5245de4..c088301097beb 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -33,7 +33,6 @@ class TestDatetimeIndexSetOps: ] # TODO: moved from test_datetimelike; dedup with version below - @pytest.mark.parametrize("sort", [None, False]) def test_union2(self, sort): everything = tm.makeDateIndex(10) first = everything[:5] @@ -42,7 +41,6 @@ def test_union2(self, sort): tm.assert_index_equal(union, everything) @pytest.mark.parametrize("box", [np.array, Series, list]) - @pytest.mark.parametrize("sort", [None, False]) def test_union3(self, sort, box): everything = tm.makeDateIndex(10) first = everything[:5] @@ -57,7 +55,6 @@ def test_union3(self, sort, box): tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", tz) - @pytest.mark.parametrize("sort", [None, False]) def test_union(self, tz, sort): rng1 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz) other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz) @@ -89,7 +86,6 @@ def test_union(self, tz, sort): else: tm.assert_index_equal(result_union, exp_notsorted) - @pytest.mark.parametrize("sort", [None, False]) def test_union_coverage(self, sort): idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"]) ordered = DatetimeIndex(idx.sort_values(), freq="infer") @@ -100,7 +96,6 @@ def test_union_coverage(self, sort): tm.assert_index_equal(result, ordered) assert result.freq == ordered.freq - @pytest.mark.parametrize("sort", [None, False]) def test_union_bug_1730(self, sort): rng_a = date_range("1/1/2012", periods=4, freq="3H") rng_b = date_range("1/1/2012", periods=4, freq="4H") @@ -113,7 +108,6 @@ def test_union_bug_1730(self, sort): exp = DatetimeIndex(exp) tm.assert_index_equal(result, exp) - @pytest.mark.parametrize("sort", [None, False]) def test_union_bug_1745(self, sort): left = DatetimeIndex(["2012-05-11 15:19:49.695000"]) right = DatetimeIndex( @@ -137,7 +131,6 @@ def test_union_bug_1745(self, sort): exp = exp.sort_values() tm.assert_index_equal(result, exp) - @pytest.mark.parametrize("sort", [None, False]) def test_union_bug_4564(self, sort): from pandas import DateOffset @@ -152,7 +145,6 @@ def test_union_bug_4564(self, sort): exp = DatetimeIndex(exp) tm.assert_index_equal(result, exp) - @pytest.mark.parametrize("sort", [None, False]) def test_union_freq_both_none(self, sort): # GH11086 expected = bdate_range("20150101", periods=10) @@ -188,7 +180,6 @@ def test_union_dataframe_index(self): exp = pd.date_range("1/1/1980", "1/1/2012", freq="MS") tm.assert_index_equal(df.index, exp) - @pytest.mark.parametrize("sort", [None, False]) def test_union_with_DatetimeIndex(self, sort): i1 = Int64Index(np.arange(0, 20, 2)) i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D") @@ -218,7 +209,6 @@ def test_intersection2(self): @pytest.mark.parametrize( "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] ) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, tz, sort): # GH 4690 (with tz) base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx") @@ -298,7 +288,6 @@ def test_intersection_bug_1708(self): assert len(result) == 0 @pytest.mark.parametrize("tz", tz) - @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, tz, sort): rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"] @@ -324,7 +313,6 @@ def test_difference(self, tz, sort): expected = expected.sort_values() tm.assert_index_equal(result_diff, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: difference of DatetimeIndex should not preserve frequency @@ -341,7 +329,6 @@ def test_difference_freq(self, sort): tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal("freq", idx_diff, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_datetimeindex_diff(self, sort): dti1 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=100) dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98) @@ -387,7 +374,6 @@ class TestBusinessDatetimeIndex: def setup_method(self, method): self.rng = bdate_range(START, END) - @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # overlapping left = self.rng[:10] @@ -423,7 +409,6 @@ def test_union(self, sort): the_union = self.rng.union(rng, sort=sort) assert isinstance(the_union, DatetimeIndex) - @pytest.mark.parametrize("sort", [None, False]) def test_union_not_cacheable(self, sort): rng = date_range("1/1/2000", periods=50, freq=Minute()) rng1 = rng[10:] @@ -466,7 +451,6 @@ def test_intersection_bug(self): result = a.intersection(b) tm.assert_index_equal(result, b) - @pytest.mark.parametrize("sort", [None, False]) def test_month_range_union_tz_pytz(self, sort): from pytz import timezone @@ -484,7 +468,6 @@ def test_month_range_union_tz_pytz(self, sort): early_dr.union(late_dr, sort=sort) @td.skip_if_windows_python_3 - @pytest.mark.parametrize("sort", [None, False]) def test_month_range_union_tz_dateutil(self, sort): from pandas._libs.tslibs.timezones import dateutil_gettz @@ -506,7 +489,6 @@ class TestCustomDatetimeIndex: def setup_method(self, method): self.rng = bdate_range(START, END, freq="C") - @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # overlapping left = self.rng[:10] diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index d9359d717de1d..e3e5070064aff 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -10,11 +10,6 @@ def name(request): return request.param -@pytest.fixture(params=[None, False]) -def sort(request): - return request.param - - def monotonic_index(start, end, dtype="int64", closed="right"): return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed) @@ -153,7 +148,6 @@ def test_symmetric_difference(self, closed, sort): @pytest.mark.parametrize( "op_name", ["union", "intersection", "difference", "symmetric_difference"] ) - @pytest.mark.parametrize("sort", [None, False]) def test_set_incompatible_types(self, closed, op_name, sort): index = monotonic_index(0, 11, closed=closed) set_op = getattr(index, op_name) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 627127f7b5b53..d7d0ff4c411aa 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -7,7 +7,6 @@ @pytest.mark.parametrize("case", [0.5, "xxx"]) -@pytest.mark.parametrize("sort", [None, False]) @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) @@ -18,7 +17,6 @@ def test_set_ops_error_cases(idx, case, sort, method): getattr(idx, method)(case, sort=sort) -@pytest.mark.parametrize("sort", [None, False]) @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) def test_intersection_base(idx, sort, klass): first = idx[2::-1] # first 3 elements reversed @@ -39,7 +37,6 @@ def test_intersection_base(idx, sort, klass): first.intersection([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [None, False]) @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) def test_union_base(idx, sort, klass): first = idx[::-1] @@ -60,7 +57,6 @@ def test_union_base(idx, sort, klass): first.union([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [None, False]) def test_difference_base(idx, sort): second = idx[4:] answer = idx[:4] @@ -83,7 +79,6 @@ def test_difference_base(idx, sort): idx.difference([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(idx, sort): first = idx[1:] second = idx[:-1] @@ -123,7 +118,6 @@ def test_empty(idx): assert idx[:0].empty -@pytest.mark.parametrize("sort", [None, False]) def test_difference(idx, sort): first = idx @@ -234,7 +228,6 @@ def test_difference_sort_incomparable_true(): idx.difference(other, sort=True) -@pytest.mark.parametrize("sort", [None, False]) def test_union(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] @@ -270,7 +263,6 @@ def test_union(idx, sort): # assert result.equals(result2) -@pytest.mark.parametrize("sort", [None, False]) def test_intersection(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 647d56d33f312..88f1687b8bb10 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -13,7 +13,6 @@ def _permute(obj): class TestPeriodIndex: - @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # union other1 = period_range("1/1/2000", freq="D", periods=5) @@ -134,7 +133,6 @@ def test_union(self, sort): expected = expected.sort_values() tm.assert_index_equal(result_union, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_union_misc(self, sort): index = period_range("1/1/2000", "1/20/2000", freq="D") @@ -165,7 +163,6 @@ def test_union_dataframe_index(self): exp = period_range("1/1/1980", "1/1/2012", freq="M") tm.assert_index_equal(df.index, exp) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): index = period_range("1/1/2000", "1/20/2000", freq="D") @@ -190,7 +187,6 @@ def test_intersection(self, sort): with pytest.raises(IncompatibleFrequency): index.intersection(index3, sort=sort) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_cases(self, sort): base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx") @@ -259,7 +255,6 @@ def test_intersection_cases(self, sort): result = rng.intersection(rng[0:0]) assert len(result) == 0 - @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, sort): # diff period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"] @@ -324,7 +319,6 @@ def test_difference(self, sort): expected = expected.sort_values() tm.assert_index_equal(result_difference, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: difference of Period MUST preserve frequency # but the ability to union results must be preserved diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 8e749e0752087..5b565310cfb9c 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -8,7 +8,6 @@ class TestRangeIndexSetOps: - @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): # intersect with Int64Index index = RangeIndex(start=0, stop=20, step=2) @@ -79,7 +78,6 @@ def test_intersection(self, sort): expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [False, None]) def test_union_noncomparable(self, sort): # corner case, non-Int64Index index = RangeIndex(start=0, stop=20, step=2) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 22f6af2af4aed..0c4a790646a81 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -681,7 +681,6 @@ def test_empty_fancy_raises(self, index): with pytest.raises(IndexError, match=msg): index[empty_farr] - @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, index, sort): first = index[:20] second = index[:10] @@ -702,7 +701,6 @@ def test_intersection(self, index, sort): (Index([3, 4, 5, 6, 7]), False), ], ) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_name_preservation(self, index2, keeps_name, sort): index1 = Index([1, 2, 3, 4, 5], name="index") expected = Index([3, 4, 5]) @@ -718,7 +716,6 @@ def test_intersection_name_preservation(self, index2, keeps_name, sort): "first_name,second_name,expected_name", [("A", "A", "A"), ("A", "B", None), (None, "B", None)], ) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_name_preservation2( self, index, first_name, second_name, expected_name, sort ): @@ -736,7 +733,6 @@ def test_intersection_name_preservation2( (Index([4, 7, 6, 5, 3], name="other"), False), ], ) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_monotonic(self, index2, keeps_name, sort): index1 = Index([5, 3, 2, 4, 1], name="index") expected = Index([5, 3, 4]) @@ -753,7 +749,6 @@ def test_intersection_monotonic(self, index2, keeps_name, sort): "index2,expected_arr", [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])], ) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): # non-monotonic non-unique index1 = Index(["A", "B", "A", "C"]) @@ -763,7 +758,6 @@ def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort) expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_intersect_str_dates(self, sort): dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] @@ -780,7 +774,6 @@ def test_intersection_equal_sort_true(self): sorted_ = pd.Index(["a", "b", "c"]) tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) - @pytest.mark.parametrize("sort", [None, False]) def test_chained_union(self, sort): # Chained unions handles names correctly i1 = Index([1, 2], name="i1") @@ -797,7 +790,6 @@ def test_chained_union(self, sort): expected = j1.union(j2, sort=sort).union(j3, sort=sort) tm.assert_index_equal(union, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_union(self, index, sort): first = index[5:20] second = index[:10] @@ -835,7 +827,6 @@ def test_union_sort_special_true(self, slice_): tm.assert_index_equal(result, expected) @pytest.mark.parametrize("klass", [np.array, Series, list]) - @pytest.mark.parametrize("sort", [None, False]) def test_union_from_iterables(self, index, klass, sort): # GH 10149 first = index[5:20] @@ -848,7 +839,6 @@ def test_union_from_iterables(self, index, klass, sort): tm.assert_index_equal(result, everything.sort_values()) assert tm.equalContents(result, everything) - @pytest.mark.parametrize("sort", [None, False]) def test_union_identity(self, index, sort): first = index[5:20] @@ -870,7 +860,6 @@ def test_union_identity(self, index, sort): "first_name, second_name, expected_name", [("A", "B", None), (None, "B", None), ("A", None, None)], ) - @pytest.mark.parametrize("sort", [None, False]) def test_union_name_preservation( self, first_list, second_list, first_name, second_name, expected_name, sort ): @@ -887,7 +876,6 @@ def test_union_name_preservation( expected = Index(vals, name=expected_name) assert tm.equalContents(union, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_union_dt_as_obj(self, sort): # TODO: Replace with fixturesult index = self.create_index() @@ -1022,7 +1010,6 @@ def test_append_empty_preserve_name(self, name, expected): assert result.name == expected @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")]) - @pytest.mark.parametrize("sort", [None, False]) def test_difference_name_preservation(self, index, second_name, expected, sort): first = index[5:20] second = index[:10] @@ -1039,7 +1026,6 @@ def test_difference_name_preservation(self, index, second_name, expected, sort): else: assert result.name == expected - @pytest.mark.parametrize("sort", [None, False]) def test_difference_empty_arg(self, index, sort): first = index[5:20] first.name == "name" @@ -1048,7 +1034,6 @@ def test_difference_empty_arg(self, index, sort): assert tm.equalContents(result, first) assert result.name == first.name - @pytest.mark.parametrize("sort", [None, False]) def test_difference_identity(self, index, sort): first = index[5:20] first.name == "name" @@ -1057,7 +1042,6 @@ def test_difference_identity(self, index, sort): assert len(result) == 0 assert result.name == first.name - @pytest.mark.parametrize("sort", [None, False]) def test_difference_sort(self, index, sort): first = index[5:20] second = index[:10] @@ -1070,7 +1054,6 @@ def test_difference_sort(self, index, sort): tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(self, sort): # smoke index1 = Index([5, 2, 3, 4], name="index1") @@ -1118,7 +1101,6 @@ def test_difference_incomparable_true(self, opname): with pytest.raises(TypeError, match="Cannot compare"): op(a) - @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_mi(self, sort): index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) @@ -1136,7 +1118,6 @@ def test_symmetric_difference_mi(self, sort): (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), ], ) - @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_missing(self, index2, expected, sort): # GH 13514 change: {nan} - {nan} == {} # (GH 6444, sorting of nans, is no longer an issue) @@ -1147,7 +1128,6 @@ def test_symmetric_difference_missing(self, index2, expected, sort): expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_non_index(self, sort): index1 = Index([1, 2, 3, 4], name="index1") index2 = np.array([2, 3, 4, 5]) @@ -1160,7 +1140,6 @@ def test_symmetric_difference_non_index(self, sort): assert tm.equalContents(result, expected) assert result.name == "new_name" - @pytest.mark.parametrize("sort", [None, False]) def test_difference_type(self, indices, sort): # GH 20040 # If taking difference of a set and itself, it @@ -1171,7 +1150,6 @@ def test_difference_type(self, indices, sort): expected = indices.drop(indices) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_difference(self, indices, sort): # GH 20040 # Test that the intersection of an index with an diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 0aa784cbb7710..4808950f17b52 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -107,7 +107,6 @@ def test_intersection_bug_1708(self): expected = timedelta_range("1 day 01:00:00", periods=3, freq="h") tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_equal(self, sort): # GH 24471 Test intersection outcome given the sort keyword # for equal indicies intersection should return the original index @@ -123,7 +122,6 @@ def test_intersection_equal(self, sort): assert inter is first @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)]) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_zero_length(self, period_1, period_2, sort): # GH 24471 test for non overlap the intersection should be zero length index_1 = timedelta_range("1 day", periods=period_1, freq="h") @@ -132,7 +130,6 @@ def test_intersection_zero_length(self, period_1, period_2, sort): result = index_1.intersection(index_2, sort=sort) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_zero_length_input_index(self, sort): # GH 24966 test for 0-len intersections are copied index_1 = timedelta_range("1 day", periods=0, freq="h") @@ -162,7 +159,6 @@ def test_zero_length_input_index(self, sort): ), ], ) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, rng, expected, sort): # GH 4690 (with tz) base = timedelta_range("1 day", periods=4, freq="h", name="idx") @@ -195,7 +191,6 @@ def test_intersection(self, rng, expected, sort): ), ], ) - @pytest.mark.parametrize("sort", [None, False]) def test_intersection_non_monotonic(self, rng, expected, sort): # 24471 non-monotonic base = TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx") @@ -213,7 +208,6 @@ def test_intersection_non_monotonic(self, rng, expected, sort): class TestTimedeltaIndexDifference: - @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: Difference of TimedeltaIndex should not preserve frequency @@ -231,7 +225,6 @@ def test_difference_freq(self, sort): tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal("freq", idx_diff, expected) - @pytest.mark.parametrize("sort", [None, False]) def test_difference_sort(self, sort): index = pd.TimedeltaIndex( From 26de103e220f477ff3a91fdc5a086236bb6bc26a Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 28 Feb 2020 17:40:28 +0200 Subject: [PATCH 193/388] CLN: Removed unused variables defenition (#32328) --- pandas/_libs/index.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 6141e2b78e9f4..0ba5cb7e9bc40 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -115,8 +115,6 @@ cdef class IndexEngine: cdef _maybe_get_bool_indexer(self, object val): cdef: ndarray[uint8_t, ndim=1, cast=True] indexer - ndarray[intp_t, ndim=1] found - int count indexer = self._get_index_values() == val return self._unpack_bool_indexer(indexer, val) From ece05171cc68c0f66765b5a1d4eda7e63fea6a52 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 28 Feb 2020 18:16:46 +0200 Subject: [PATCH 194/388] CLN: some code cleanups (#32177) --- pandas/_libs/hashtable.pyx | 6 + pandas/_libs/internals.pyx | 6 +- pandas/_libs/interval.pyx | 3 +- pandas/_libs/join.pyx | 28 +++-- pandas/_libs/tslib.pyx | 189 +++++++++++++++++++---------- pandas/_libs/tslibs/fields.pyx | 27 +++-- pandas/_libs/tslibs/nattype.pyx | 4 +- pandas/_libs/tslibs/timestamps.pyx | 7 +- 8 files changed, 169 insertions(+), 101 deletions(-) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 884db9ee931d4..e80f134290a7e 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -86,7 +86,10 @@ cdef class Factorizer: self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None ): """ + Examples + -------- Factorize values with nans replaced by na_sentinel + >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) array([ 0, 1, 20]) """ @@ -131,7 +134,10 @@ cdef class Int64Factorizer: def factorize(self, const int64_t[:] values, sort=False, na_sentinel=-1, na_value=None): """ + Examples + -------- Factorize values with nans replaced by na_sentinel + >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) array([ 0, 1, 20]) """ diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 8bbbc6db94842..437406cbbd819 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -105,8 +105,7 @@ cdef class BlockPlacement: Py_ssize_t start, stop, end, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) - self._as_array = np.arange(start, stop, step, - dtype=np.int64) + self._as_array = np.arange(start, stop, step, dtype=np.int64) self._has_array = True return self._as_array @@ -283,8 +282,7 @@ cdef slice_getitem(slice slc, ind): s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) if isinstance(ind, slice): - ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, - s_len) + ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) if ind_step > 0 and ind_len == s_len: # short-cut for no-op slice diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 1166768472449..55999f2d6fd74 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -481,8 +481,7 @@ cdef class Interval(IntervalMixin): @cython.wraparound(False) @cython.boundscheck(False) -def intervals_to_interval_bounds(ndarray intervals, - bint validate_closed=True): +def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): """ Parameters ---------- diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index f696591cf3bd1..cbe0e71153565 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -817,18 +817,22 @@ def asof_join_nearest_on_X_by_Y(asof_t[:] left_values, right_indexer = np.empty(left_size, dtype=np.int64) # search both forward and backward - bli, bri = asof_join_backward_on_X_by_Y(left_values, - right_values, - left_by_values, - right_by_values, - allow_exact_matches, - tolerance) - fli, fri = asof_join_forward_on_X_by_Y(left_values, - right_values, - left_by_values, - right_by_values, - allow_exact_matches, - tolerance) + bli, bri = asof_join_backward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + ) + fli, fri = asof_join_forward_on_X_by_Y( + left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance, + ) for i in range(len(bri)): # choose timestamp from right with smaller difference diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a176c4e41e834..b78b623bfa187 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,27 +1,44 @@ import cython -from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, - PyDateTime_IMPORT, - timedelta, datetime, date, time) +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + date, + datetime, + time, + timedelta, +) # import datetime C API PyDateTime_IMPORT cimport numpy as cnp -from numpy cimport int64_t, ndarray, float64_t +from numpy cimport float64_t, int64_t, ndarray import numpy as np cnp.import_array() import pytz from pandas._libs.util cimport ( - is_integer_object, is_float_object, is_datetime64_object) + is_datetime64_object, + is_float_object, + is_integer_object, +) from pandas._libs.tslibs.c_timestamp cimport _Timestamp from pandas._libs.tslibs.np_datetime cimport ( - check_dts_bounds, npy_datetimestruct, _string_to_dts, dt64_to_dtstruct, - dtstruct_to_dt64, pydatetime_to_dt64, pydate_to_dt64, get_datetime64_value) + _string_to_dts, + check_dts_bounds, + dt64_to_dtstruct, + dtstruct_to_dt64, + get_datetime64_value, + npy_datetimestruct, + pydate_to_dt64, + pydatetime_to_dt64, +) + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.parsing import parse_datetime_string @@ -44,45 +61,71 @@ from pandas._libs.tslibs.timestamps cimport create_timestamp_from_ts from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.tzconversion cimport ( - tz_convert_single, tz_convert_utc_to_tzlocal) + tz_convert_single, + tz_convert_utc_to_tzlocal, +) cdef inline object create_datetime_from_ts( - int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold): - """ convenience routine to construct a datetime.datetime from its parts """ - return datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz, fold=fold) + int64_t value, + npy_datetimestruct dts, + object tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.datetime from its parts. + """ + return datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold + ) cdef inline object create_date_from_ts( - int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold): - """ convenience routine to construct a datetime.date from its parts """ + int64_t value, + npy_datetimestruct dts, + object tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.date from its parts. + """ # GH 25057 add fold argument to match other func_create signatures return date(dts.year, dts.month, dts.day) cdef inline object create_time_from_ts( - int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold): - """ convenience routine to construct a datetime.time from its parts """ + int64_t value, + npy_datetimestruct dts, + object tz, + object freq, + bint fold +): + """ + Convenience routine to construct a datetime.time from its parts. + """ return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) @cython.wraparound(False) @cython.boundscheck(False) -def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, - bint fold=0, str box="datetime"): +def ints_to_pydatetime( + const int64_t[:] arr, + object tz=None, + object freq=None, + bint fold=0, + str box="datetime" +): """ - Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. Parameters ---------- - arr : array of i8 - tz : str, default None + arr : array of i8 + tz : str, optional convert to this timezone - freq : str/Offset, default None + freq : str/Offset, optional freq to convert fold : bint, default is 0 Due to daylight saving time, one wall clock time can occur twice @@ -91,17 +134,16 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, the wall clock hits the ambiguous time .. versionadded:: 1.1.0 - box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' - If datetime, convert to datetime.datetime - If date, convert to datetime.date - If time, convert to datetime.time - If Timestamp, convert to pandas.Timestamp + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' + * If datetime, convert to datetime.datetime + * If date, convert to datetime.date + * If time, convert to datetime.time + * If Timestamp, convert to pandas.Timestamp Returns ------- - result : array of dtype specified by box + ndarray of dtype specified by box """ - cdef: Py_ssize_t i, n = len(arr) ndarray[int64_t] trans @@ -224,8 +266,12 @@ def _test_parse_iso8601(ts: str): @cython.wraparound(False) @cython.boundscheck(False) -def format_array_from_datetime(ndarray[int64_t] values, object tz=None, - object format=None, object na_rep=None): +def format_array_from_datetime( + ndarray[int64_t] values, + object tz=None, + object format=None, + object na_rep=None +): """ return a np object array of the string formatted values @@ -303,8 +349,12 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, return result -def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, - str errors='coerce'): +def array_with_unit_to_datetime( + ndarray values, + ndarray mask, + object unit, + str errors='coerce' +): """ Convert the ndarray to datetime according to the time unit. @@ -322,14 +372,13 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, Parameters ---------- values : ndarray of object - Date-like objects to convert - mask : ndarray of bool - Not-a-time mask for non-nullable integer types conversion, - can be None + Date-like objects to convert. + mask : boolean ndarray + Not-a-time mask for non-nullable integer types conversion, can be None. unit : object - Time unit to use during conversion + Time unit to use during conversion. errors : str, default 'raise' - Error behavior when parsing + Error behavior when parsing. Returns ------- @@ -382,8 +431,7 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): - raise OutOfBoundsDatetime(f"cannot convert input with unit " - f"'{unit}'") + raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') iresult[mask] = NPY_NAT @@ -409,8 +457,8 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, except OverflowError: if is_raise: raise OutOfBoundsDatetime( - f"cannot convert input {val} with the unit " - f"'{unit}'") + f"cannot convert input {val} with the unit '{unit}'" + ) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -425,16 +473,16 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, except ValueError: if is_raise: raise ValueError( - f"non convertible value {val} with the unit " - f"'{unit}'") + f"non convertible value {val} with the unit '{unit}'" + ) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT except OverflowError: if is_raise: raise OutOfBoundsDatetime( - f"cannot convert input {val} with the unit " - f"'{unit}'") + f"cannot convert input {val} with the unit '{unit}'" + ) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -442,8 +490,9 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, else: if is_raise: - raise ValueError(f"unit='{unit}' not valid with non-numerical " - f"val='{val}'") + raise ValueError( + f"unit='{unit}' not valid with non-numerical val='{val}'" + ) if is_ignore: raise AssertionError @@ -486,9 +535,14 @@ def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, @cython.wraparound(False) @cython.boundscheck(False) -cpdef array_to_datetime(ndarray[object] values, str errors='raise', - bint dayfirst=False, bint yearfirst=False, - object utc=None, bint require_iso8601=False): +cpdef array_to_datetime( + ndarray[object] values, + str errors='raise', + bint dayfirst=False, + bint yearfirst=False, + object utc=None, + bint require_iso8601=False +): """ Converts a 1D array of date-like values to a numpy array of either: 1) datetime64[ns] data @@ -625,8 +679,9 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', iresult[i] = NPY_NAT continue elif is_raise: - raise ValueError(f"time data {val} doesn't " - f"match format specified") + raise ValueError( + f"time data {val} doesn't match format specified" + ) return values, tz_out try: @@ -641,8 +696,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', if is_coerce: iresult[i] = NPY_NAT continue - raise TypeError("invalid string coercion to " - "datetime") + raise TypeError("invalid string coercion to datetime") if tz is not None: seen_datetime_offset = 1 @@ -708,8 +762,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', return ignore_errors_out_of_bounds_fallback(values), tz_out except TypeError: - return array_to_datetime_object(values, errors, - dayfirst, yearfirst) + return array_to_datetime_object(values, errors, dayfirst, yearfirst) if seen_datetime and seen_integer: # we have mixed datetimes & integers @@ -724,8 +777,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', elif is_raise: raise ValueError("mixed datetimes and integers in passed array") else: - return array_to_datetime_object(values, errors, - dayfirst, yearfirst) + return array_to_datetime_object(values, errors, dayfirst, yearfirst) if seen_datetime_offset and not utc_convert: # GH#17697 @@ -736,8 +788,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - return array_to_datetime_object(values, errors, - dayfirst, yearfirst) + return array_to_datetime_object(values, errors, dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() tz_out = pytz.FixedOffset(tz_offset / 60.) @@ -784,8 +835,12 @@ cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values): @cython.wraparound(False) @cython.boundscheck(False) -cdef array_to_datetime_object(ndarray[object] values, str errors, - bint dayfirst=False, bint yearfirst=False): +cdef array_to_datetime_object( + ndarray[object] values, + str errors, + bint dayfirst=False, + bint yearfirst=False +): """ Fall back function for array_to_datetime diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 8bee7da6231ba..50b7fba67e78f 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -38,7 +38,7 @@ def get_time_micros(const int64_t[:] dtindex): cdef: ndarray[int64_t] micros - micros = np.mod(dtindex, DAY_SECONDS * 1000000000, dtype=np.int64) + micros = np.mod(dtindex, DAY_SECONDS * 1_000_000_000, dtype=np.int64) micros //= 1000 return micros @@ -54,13 +54,15 @@ def build_field_sarray(const int64_t[:] dtindex): npy_datetimestruct dts ndarray[int32_t] years, months, days, hours, minutes, seconds, mus - sa_dtype = [('Y', 'i4'), # year - ('M', 'i4'), # month - ('D', 'i4'), # day - ('h', 'i4'), # hour - ('m', 'i4'), # min - ('s', 'i4'), # second - ('u', 'i4')] # microsecond + sa_dtype = [ + ("Y", "i4"), # year + ("M", "i4"), # month + ("D", "i4"), # day + ("h", "i4"), # hour + ("m", "i4"), # min + ("s", "i4"), # second + ("u", "i4"), # microsecond + ] out = np.empty(count, dtype=sa_dtype) @@ -157,9 +159,12 @@ def get_start_end_field(const int64_t[:] dtindex, object field, int mo_off, dom, doy, dow, ldom _month_offset = np.array( - [[0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365], - [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]], - dtype=np.int32) + [ + [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365], + [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366], + ], + dtype=np.int32, + ) out = np.zeros(count, dtype='int8') diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 68a25d0cc481a..7fec4ba5e7d25 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -764,7 +764,9 @@ NaT = c_NaT # Python-visible # ---------------------------------------------------------------------- cdef inline bint checknull_with_nat(object val): - """ utility to check if a value is a nat or not """ + """ + Utility to check if a value is a nat or not. + """ return val is None or util.is_nan(val) or val is c_NaT or val is C_NA diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5cd3467eed042..64b79200028b6 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1090,11 +1090,10 @@ default 'raise' def normalize(self): """ - Normalize Timestamp to midnight, preserving - tz information. + Normalize Timestamp to midnight, preserving tz information. """ if self.tz is None or is_utc(self.tz): - DAY_NS = DAY_SECONDS * 1000000000 + DAY_NS = DAY_SECONDS * 1_000_000_000 normalized_value = self.value - (self.value % DAY_NS) return Timestamp(normalized_value).tz_localize(self.tz) normalized_value = normalize_i8_timestamps( @@ -1113,7 +1112,7 @@ cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # INT64_MIN + 1 == -9223372036854775807 # but to allow overflow free conversion with a microsecond resolution # use the smallest value with a 0 nanosecond unit (0s in last 3 digits) -cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 +cdef int64_t _NS_LOWER_BOUND = -9_223_372_036_854_775_000 # Resolution is in nanoseconds Timestamp.min = Timestamp(_NS_LOWER_BOUND) From b512ed5c44c27c05b5c90c5c6aaa05e34bd6d60f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 28 Feb 2020 18:41:25 +0200 Subject: [PATCH 195/388] CLN: _libs.interval looping with cdef index (#32329) * CLN: using "n" variable to loop * cdef for "i" as index and for "n" as the length --- pandas/_libs/interval.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 55999f2d6fd74..2240c821cd239 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -501,14 +501,14 @@ def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): """ cdef: object closed = None, interval - int64_t n = len(intervals) + Py_ssize_t i, n = len(intervals) ndarray left, right bint seen_closed = False left = np.empty(n, dtype=intervals.dtype) right = np.empty(n, dtype=intervals.dtype) - for i in range(len(intervals)): + for i in range(n): interval = intervals[i] if interval is None or util.is_nan(interval): left[i] = np.nan From 48abf99d2034e01be41b8c75b58ec8a821acb9b9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 29 Feb 2020 00:56:20 +0000 Subject: [PATCH 196/388] TST/CLN: Follow-up to #31867 (#32324) --- pandas/tests/indexing/test_floats.py | 55 ++++++---------------------- 1 file changed, 11 insertions(+), 44 deletions(-) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index c966962a7c87d..fff5ca03e80f4 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -95,43 +95,14 @@ def test_scalar_non_numeric(self, index_func, klass): s = gen_obj(klass, i) # getting - for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]: + with pytest.raises(KeyError, match="^3.0$"): + s[3.0] - if getitem: - error = KeyError - msg = r"^3\.0?$" - else: - error = TypeError - msg = ( - r"cannot do (label|positional) indexing " - fr"on {type(i).__name__} with these indexers \[3\.0\] of " - r"type float|" - "Cannot index by location index with a " - "non-integer key" - ) - with pytest.raises(error, match=msg): - idxr(s)[3.0] - - # label based can be a TypeError or KeyError - if s.index.inferred_type in { - "categorical", - "string", - "unicode", - "mixed", - "period", - "timedelta64", - "datetime64", - }: - error = KeyError - msg = r"^3\.0$" - else: - error = TypeError - msg = ( - r"cannot do (label|positional) indexing " - fr"on {type(i).__name__} with these indexers \[3\.0\] of " - "type float" - ) - with pytest.raises(error, match=msg): + msg = "Cannot index by location index with a non-integer key" + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] + + with pytest.raises(KeyError, match="^3.0$"): s.loc[3.0] # contains @@ -190,16 +161,12 @@ def test_scalar_with_mixed(self): s2 = Series([1, 2, 3], index=["a", "b", "c"]) s3 = Series([1, 2, 3], index=["a", "b", 1.5]) - # lookup in a pure stringstr - # with an invalid indexer - msg = ( - r"cannot do label indexing " - r"on Index with these indexers \[1\.0\] of " - r"type float|" - "Cannot index by location index with a non-integer key" - ) + # lookup in a pure string index with an invalid indexer + with pytest.raises(KeyError, match="^1.0$"): s2[1.0] + + msg = "Cannot index by location index with a non-integer key" with pytest.raises(TypeError, match=msg): s2.iloc[1.0] From 7d9c20073af96fe9704844deee37289fbf0985c7 Mon Sep 17 00:00:00 2001 From: "Joaquim L. Viegas" Date: Sat, 29 Feb 2020 03:07:23 +0000 Subject: [PATCH 197/388] DOC: Fixed reference to `convert_dtypes` in `to_numeric` (#32295) (#32333) --- pandas/core/tools/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 4939cbfc9cc96..40f376724bd39 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -70,7 +70,7 @@ def to_numeric(arg, errors="raise", downcast=None): to_datetime : Convert argument to datetime. to_timedelta : Convert argument to timedelta. numpy.ndarray.astype : Cast a numpy array to a specified type. - convert_dtypes : Convert dtypes. + DataFrame.convert_dtypes : Convert dtypes. Examples -------- From aeeca53dc88c038f1afb546411a1835092bd1c7b Mon Sep 17 00:00:00 2001 From: Chuanzhu Xu Date: Sat, 29 Feb 2020 11:16:32 -0500 Subject: [PATCH 198/388] TYP: Update type naming in formatter (#32345) --- pandas/core/frame.py | 4 ++-- pandas/io/formats/format.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f304fadbab871..f515b57e24cfa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -758,8 +758,8 @@ def to_string( header: Union[bool, Sequence[str]] = True, index: bool = True, na_rep: str = "NaN", - formatters: Optional[fmt.formatters_type] = None, - float_format: Optional[fmt.float_format_type] = None, + formatters: Optional[fmt.FormattersType] = None, + float_format: Optional[fmt.FloatFormatType] = None, sparsify: Optional[bool] = None, index_names: bool = True, justify: Optional[str] = None, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b5ddd15c1312a..2a528781f8c93 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -81,10 +81,10 @@ if TYPE_CHECKING: from pandas import Series, DataFrame, Categorical -formatters_type = Union[ +FormattersType = Union[ List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] ] -float_format_type = Union[str, Callable, "EngFormatter"] +FloatFormatType = Union[str, Callable, "EngFormatter"] common_docstring = """ Parameters @@ -455,7 +455,7 @@ class TableFormatter: show_dimensions: Union[bool, str] is_truncated: bool - formatters: formatters_type + formatters: FormattersType columns: Index @property @@ -548,9 +548,9 @@ def __init__( header: Union[bool, Sequence[str]] = True, index: bool = True, na_rep: str = "NaN", - formatters: Optional[formatters_type] = None, + formatters: Optional[FormattersType] = None, justify: Optional[str] = None, - float_format: Optional[float_format_type] = None, + float_format: Optional[FloatFormatType] = None, sparsify: Optional[bool] = None, index_names: bool = True, line_width: Optional[int] = None, @@ -1089,7 +1089,7 @@ def _get_column_name_list(self) -> List[str]: def format_array( values: Any, formatter: Optional[Callable], - float_format: Optional[float_format_type] = None, + float_format: Optional[FloatFormatType] = None, na_rep: str = "NaN", digits: Optional[int] = None, space: Optional[Union[str, int]] = None, @@ -1171,7 +1171,7 @@ def __init__( formatter: Optional[Callable] = None, na_rep: str = "NaN", space: Union[str, int] = 12, - float_format: Optional[float_format_type] = None, + float_format: Optional[FloatFormatType] = None, justify: str = "right", decimal: str = ".", quoting: Optional[int] = None, @@ -1278,7 +1278,7 @@ def __init__(self, *args, **kwargs): def _value_formatter( self, - float_format: Optional[float_format_type] = None, + float_format: Optional[FloatFormatType] = None, threshold: Optional[Union[float, int]] = None, ) -> Callable: """Returns a function to be applied on each value to format it""" @@ -1372,7 +1372,7 @@ def format_values_with(float_format): # There is a special default string when we are fixed-width # The default is otherwise to use str instead of a formatting string - float_format: Optional[float_format_type] + float_format: Optional[FloatFormatType] if self.float_format is None: if self.fixed_width: float_format = partial( From 89f3f4ea7ecb80f346bfe2cfd38fd9f24d6f9fbd Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 29 Feb 2020 17:17:47 +0000 Subject: [PATCH 199/388] print merged df result (#32370) --- doc/source/user_guide/merging.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 8302b5c5dea60..49f4bbb6beb19 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -742,7 +742,7 @@ as shown in the following example. ) ser - result = pd.merge(df, ser.reset_index(), on=['Let', 'Num']) + pd.merge(df, ser.reset_index(), on=['Let', 'Num']) Here is another example with duplicate join keys in DataFrames: From 43321fa6d36f943551c0ae65bb0511824e60f9fa Mon Sep 17 00:00:00 2001 From: Nathanael Date: Sun, 1 Mar 2020 00:27:49 +0700 Subject: [PATCH 200/388] DOC: Fix SS06 formatting errors in merge_asof docstrings (#32351) --- pandas/core/reshape/merge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 49ac1b6cfa52b..faac472b3fc31 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -320,10 +320,10 @@ def merge_asof( direction: str = "backward", ) -> "DataFrame": """ - Perform an asof merge. This is similar to a left-join except that we - match on nearest key rather than equal keys. + Perform an asof merge. - Both DataFrames must be sorted by the key. + This is similar to a left-join except that we match on nearest + key rather than equal keys. Both DataFrames must be sorted by the key. For each row in the left DataFrame: From 712000eee1b8602a418db43ed9b427e7a284bc26 Mon Sep 17 00:00:00 2001 From: Zaky Bilfagih <55378008+mackarelfish@users.noreply.github.com> Date: Sun, 1 Mar 2020 00:43:45 +0700 Subject: [PATCH 201/388] Changed kind parameter from integer to int, Added example (#32361) --- pandas/core/arrays/sparse/array.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 542cfd334b810..549606795f528 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -255,6 +255,16 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): Methods ------- None + + Examples + -------- + >>> from pandas.arrays import SparseArray + >>> arr = SparseArray([0, 0, 1, 2]) + >>> arr + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) """ _pandas_ftype = "sparse" From edc27e43c67cf87e042adb41547441e6dae2dfa0 Mon Sep 17 00:00:00 2001 From: tolhassianipar <45849317+tolhassianipar@users.noreply.github.com> Date: Sun, 1 Mar 2020 00:55:17 +0700 Subject: [PATCH 202/388] solve 'check_category_order' description ends with '.' (#32358) --- pandas/_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index a70f75d6cfaf4..fce06e216dfd7 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1106,7 +1106,7 @@ def assert_series_equal( check_categorical : bool, default True Whether to compare internal Categorical exactly. check_category_order : bool, default True - Whether to compare category order of internal Categoricals + Whether to compare category order of internal Categoricals. .. versionadded:: 1.0.2 obj : str, default 'Series' From 183129f45d11b48c886c3f3fd97331ac5a77175c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 29 Feb 2020 20:04:06 +0200 Subject: [PATCH 203/388] Silence warnings when compiling pandas/_libs/parsers.pyx (#32368) --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 3077f73a8d1a4..2fd227694800c 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -701,7 +701,7 @@ cdef class TextReader: char *word object name, old_name int status - uint64_t hr, data_line + uint64_t hr, data_line = 0 char *errors = "strict" StringPath path = _string_path(self.c_encoding) From 01f399854f9febefa9e97005f3720aa312409b98 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 29 Feb 2020 11:36:30 -0800 Subject: [PATCH 204/388] TST/REF: move tools test files (#32338) --- .../datetimes/test_tools.py => tools/test_to_datetime.py} | 0 pandas/tests/tools/{test_numeric.py => test_to_numeric.py} | 0 .../timedeltas/test_tools.py => tools/test_to_timedelta.py} | 0 setup.cfg | 2 +- 4 files changed, 1 insertion(+), 1 deletion(-) rename pandas/tests/{indexes/datetimes/test_tools.py => tools/test_to_datetime.py} (100%) rename pandas/tests/tools/{test_numeric.py => test_to_numeric.py} (100%) rename pandas/tests/{indexes/timedeltas/test_tools.py => tools/test_to_timedelta.py} (100%) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/tools/test_to_datetime.py similarity index 100% rename from pandas/tests/indexes/datetimes/test_tools.py rename to pandas/tests/tools/test_to_datetime.py diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_to_numeric.py similarity index 100% rename from pandas/tests/tools/test_numeric.py rename to pandas/tests/tools/test_to_numeric.py diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/tools/test_to_timedelta.py similarity index 100% rename from pandas/tests/indexes/timedeltas/test_tools.py rename to pandas/tests/tools/test_to_timedelta.py diff --git a/setup.cfg b/setup.cfg index 61d5b1030a500..bbd8489622005 100644 --- a/setup.cfg +++ b/setup.cfg @@ -135,7 +135,7 @@ ignore_errors=True [mypy-pandas.tests.arithmetic.test_datetime64] ignore_errors=True -[mypy-pandas.tests.indexes.datetimes.test_tools] +[mypy-pandas.tests.tools.test_to_datetime] ignore_errors=True [mypy-pandas.tests.scalar.period.test_period] From 0ce463d6bcef33ae0d2b2f877034a21216af99ea Mon Sep 17 00:00:00 2001 From: dan1261 <42591328+dan1261@users.noreply.github.com> Date: Sun, 1 Mar 2020 03:24:43 +0100 Subject: [PATCH 205/388] DOC: Minor typo fixes for code style guide (#32379) --- doc/source/development/code_style.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst index 17f8783f71bfb..1b223cf5f026b 100644 --- a/doc/source/development/code_style.rst +++ b/doc/source/development/code_style.rst @@ -21,7 +21,7 @@ Patterns foo.__class__ ------------- -*pandas* uses 'type(foo)' instead 'foo.__class__' as it is making the code more +*pandas* uses 'type(foo)' instead 'foo.__class__' as it makes the code more readable. For example: @@ -52,8 +52,8 @@ f-strings *pandas* uses f-strings formatting instead of '%' and '.format()' string formatters. -The convention of using f-strings on a string that is concatenated over serveral lines, -is to prefix only the lines containing the value needs to be interpeted. +The convention of using f-strings on a string that is concatenated over several lines, +is to prefix only the lines containing values which need to be interpreted. For example: @@ -86,8 +86,8 @@ For example: White spaces ~~~~~~~~~~~~ -Putting the white space only at the end of the previous line, so -there is no whitespace at the beggining of the concatenated string. +Only put white space at the end of the previous line, so +there is no whitespace at the beginning of the concatenated string. For example: @@ -116,7 +116,7 @@ Representation function (aka 'repr()') *pandas* uses 'repr()' instead of '%r' and '!r'. -The use of 'repr()' will only happend when the value is not an obvious string. +The use of 'repr()' will only happen when the value is not an obvious string. For example: @@ -138,7 +138,7 @@ For example: Imports (aim for absolute) ========================== -In Python 3, absolute imports are recommended. In absolute import doing something +In Python 3, absolute imports are recommended. Using absolute imports, doing something like ``import string`` will import the string module rather than ``string.py`` in the same directory. As much as possible, you should try to write out absolute imports that show the whole import chain from top-level pandas. From 7e6b583d0b583b960d12eda828de7f62124f9778 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 03:18:36 -0800 Subject: [PATCH 206/388] REF: collect+parametrize reorder_levels tests (#32373) --- pandas/conftest.py | 14 ++++ pandas/tests/frame/test_alter_axes.py | 38 ---------- .../generic/methods/test_reorder_levels.py | 73 +++++++++++++++++++ pandas/tests/indexing/multiindex/conftest.py | 15 ---- pandas/tests/series/test_alter_axes.py | 26 ------- pandas/tests/test_multilevel.py | 19 ----- 6 files changed, 87 insertions(+), 98 deletions(-) create mode 100644 pandas/tests/generic/methods/test_reorder_levels.py diff --git a/pandas/conftest.py b/pandas/conftest.py index be44e6c2b36da..d6b9576d7729f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1057,3 +1057,17 @@ def index_or_series_obj(request): copy to avoid mutation, e.g. setting .name """ return _index_or_series_objs[request.param].copy(deep=True) + + +@pytest.fixture +def multiindex_year_month_day_dataframe_random_data(): + """ + DataFrame with 3 level MultiIndex (year, month, day) covering + first 100 business days from 2000-01-01 with random data + """ + tdf = tm.makeTimeDataFrame(100) + ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() + # use Int64Index, to make sure things work + ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True) + ymd.index.set_names(["year", "month", "day"], inplace=True) + return ymd diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 34df8bb57dd91..e37170d4155f8 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -689,44 +689,6 @@ def test_rename_axis_mapper(self): with pytest.raises(TypeError, match="bogus"): df.rename_axis(bogus=None) - def test_reorder_levels(self): - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - names=["L0", "L1", "L2"], - ) - df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) - - # no change, position - result = df.reorder_levels([0, 1, 2]) - tm.assert_frame_equal(df, result) - - # no change, labels - result = df.reorder_levels(["L0", "L1", "L2"]) - tm.assert_frame_equal(df, result) - - # rotate, position - result = df.reorder_levels([1, 2, 0]) - e_idx = MultiIndex( - levels=[["one", "two", "three"], [0, 1], ["bar"]], - codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], - names=["L1", "L2", "L0"], - ) - expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) - tm.assert_frame_equal(result, expected) - - result = df.reorder_levels([0, 0, 0]) - e_idx = MultiIndex( - levels=[["bar"], ["bar"], ["bar"]], - codes=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], - names=["L0", "L0", "L0"], - ) - expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) - tm.assert_frame_equal(result, expected) - - result = df.reorder_levels(["L0", "L0", "L0"]) - tm.assert_frame_equal(result, expected) - def test_set_index_names(self): df = tm.makeDataFrame() df.index.name = "name" diff --git a/pandas/tests/generic/methods/test_reorder_levels.py b/pandas/tests/generic/methods/test_reorder_levels.py new file mode 100644 index 0000000000000..8bb6417e56659 --- /dev/null +++ b/pandas/tests/generic/methods/test_reorder_levels.py @@ -0,0 +1,73 @@ +import numpy as np +import pytest + +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +class TestReorderLevels: + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_reorder_levels(self, klass): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + names=["L0", "L1", "L2"], + ) + df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) + obj = df if klass is DataFrame else df["A"] + + # no change, position + result = obj.reorder_levels([0, 1, 2]) + tm.assert_equal(obj, result) + + # no change, labels + result = obj.reorder_levels(["L0", "L1", "L2"]) + tm.assert_equal(obj, result) + + # rotate, position + result = obj.reorder_levels([1, 2, 0]) + e_idx = MultiIndex( + levels=[["one", "two", "three"], [0, 1], ["bar"]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], + names=["L1", "L2", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + expected = expected if klass is DataFrame else expected["A"] + tm.assert_equal(result, expected) + + result = obj.reorder_levels([0, 0, 0]) + e_idx = MultiIndex( + levels=[["bar"], ["bar"], ["bar"]], + codes=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], + names=["L0", "L0", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + expected = expected if klass is DataFrame else expected["A"] + tm.assert_equal(result, expected) + + result = obj.reorder_levels(["L0", "L0", "L0"]) + tm.assert_equal(result, expected) + + def test_reorder_levels_swaplevel_equivalence( + self, multiindex_year_month_day_dataframe_random_data + ): + + ymd = multiindex_year_month_day_dataframe_random_data + + result = ymd.reorder_levels(["month", "day", "year"]) + expected = ymd.swaplevel(0, 1).swaplevel(1, 2) + tm.assert_frame_equal(result, expected) + + result = ymd["A"].reorder_levels(["month", "day", "year"]) + expected = ymd["A"].swaplevel(0, 1).swaplevel(1, 2) + tm.assert_series_equal(result, expected) + + result = ymd.T.reorder_levels(["month", "day", "year"], axis=1) + expected = ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) + tm.assert_frame_equal(result, expected) + + with pytest.raises(TypeError, match="hierarchical axis"): + ymd.reorder_levels([1, 2], axis=1) + + with pytest.raises(IndexError, match="Too many levels"): + ymd.index.reorder_levels([1, 2, 3]) diff --git a/pandas/tests/indexing/multiindex/conftest.py b/pandas/tests/indexing/multiindex/conftest.py index 0256f5e35e1db..c69d6f86a6ce6 100644 --- a/pandas/tests/indexing/multiindex/conftest.py +++ b/pandas/tests/indexing/multiindex/conftest.py @@ -2,7 +2,6 @@ import pytest from pandas import DataFrame, Index, MultiIndex -import pandas._testing as tm @pytest.fixture @@ -16,17 +15,3 @@ def multiindex_dataframe_random_data(): return DataFrame( np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp") ) - - -@pytest.fixture -def multiindex_year_month_day_dataframe_random_data(): - """ - DataFrame with 3 level MultiIndex (year, month, day) covering - first 100 business days from 2000-01-01 with random data - """ - tdf = tm.makeTimeDataFrame(100) - ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() - # use Int64Index, to make sure things work - ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True) - ymd.index.set_names(["year", "month", "day"], inplace=True) - return ymd diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index f6ca93b0c2882..769d1ed877a69 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -54,32 +54,6 @@ def test_set_index_makes_timeseries(self): s.index = idx assert s.index.is_all_dates - def test_reorder_levels(self): - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - names=["L0", "L1", "L2"], - ) - s = Series(np.arange(6), index=index) - - # no change, position - result = s.reorder_levels([0, 1, 2]) - tm.assert_series_equal(s, result) - - # no change, labels - result = s.reorder_levels(["L0", "L1", "L2"]) - tm.assert_series_equal(s, result) - - # rotate, position - result = s.reorder_levels([1, 2, 0]) - e_idx = MultiIndex( - levels=[["one", "two", "three"], [0, 1], ["bar"]], - codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], - names=["L1", "L2", "L0"], - ) - expected = Series(np.arange(6), index=e_idx) - tm.assert_series_equal(result, expected) - def test_rename_axis_mapper(self): # GH 19978 mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e3cf46b466ae4..84279d874bae1 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -939,25 +939,6 @@ def test_swaplevel(self): with pytest.raises(TypeError, match=msg): DataFrame(range(3)).swaplevel() - def test_reorder_levels(self): - result = self.ymd.reorder_levels(["month", "day", "year"]) - expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) - tm.assert_frame_equal(result, expected) - - result = self.ymd["A"].reorder_levels(["month", "day", "year"]) - expected = self.ymd["A"].swaplevel(0, 1).swaplevel(1, 2) - tm.assert_series_equal(result, expected) - - result = self.ymd.T.reorder_levels(["month", "day", "year"], axis=1) - expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) - tm.assert_frame_equal(result, expected) - - with pytest.raises(TypeError, match="hierarchical axis"): - self.ymd.reorder_levels([1, 2], axis=1) - - with pytest.raises(IndexError, match="Too many levels"): - self.ymd.index.reorder_levels([1, 2, 3]) - def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] From 8b347ea2672e3dcd34c3f699581ab5061706ec33 Mon Sep 17 00:00:00 2001 From: Rushabh Vasani Date: Mon, 2 Mar 2020 21:00:17 +0530 Subject: [PATCH 207/388] TST: Allow definition of `pd.CategoricalDtype` with a specific `categories.dtype` (#32115) --- pandas/core/dtypes/dtypes.py | 8 ++++++++ pandas/tests/dtypes/test_dtypes.py | 14 +++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 33daf6627721f..181f0c8906853 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -175,6 +175,8 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): ---------- categories : sequence, optional Must be unique, and must not contain any nulls. + The categories are stored in an Index, + and if an index is provided the dtype of that index will be used. ordered : bool or None, default False Whether or not this categorical is treated as a ordered categorical. None can be used to maintain the ordered value of existing categoricals when @@ -210,6 +212,12 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): 3 NaN dtype: category Categories (2, object): [b < a] + + An empty CategoricalDtype with a specific dtype can be created + by providing an empty index. As follows, + + >>> pd.CategoricalDtype(pd.DatetimeIndex([])).categories.dtype + dtype(' Date: Mon, 2 Mar 2020 09:27:45 -0800 Subject: [PATCH 208/388] TYP: annotations for internals, set_axis (#32376) --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 5 +++-- pandas/core/internals/managers.py | 20 ++++++++++---------- pandas/core/series.py | 23 +++++++++++++---------- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f515b57e24cfa..61715397e8e0b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3599,7 +3599,7 @@ def align( see_also_sub=" or columns", ) @Appender(NDFrame.set_axis.__doc__) - def set_axis(self, labels, axis=0, inplace=False): + def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) @Substitution(**_shared_doc_kwargs) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 25770c2c6470c..890c0540d9851 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -520,7 +520,7 @@ def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries: """ internal compat with SelectionMixin """ return self - def set_axis(self, labels, axis=0, inplace=False): + def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): """ Assign desired index to given axis. @@ -561,7 +561,8 @@ def set_axis(self, labels, axis=0, inplace=False): obj.set_axis(labels, axis=axis, inplace=True) return obj - def _set_axis(self, axis, labels) -> None: + def _set_axis(self, axis: int, labels: Index) -> None: + labels = ensure_index(labels) self._data.set_axis(axis, labels) self._clear_item_cache() diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e397167e4881f..9c90b20fc0f16 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -164,15 +164,15 @@ def __nonzero__(self): __bool__ = __nonzero__ @property - def shape(self): + def shape(self) -> Tuple[int, ...]: return tuple(len(ax) for ax in self.axes) @property def ndim(self) -> int: return len(self.axes) - def set_axis(self, axis, new_labels): - new_labels = ensure_index(new_labels) + def set_axis(self, axis: int, new_labels: Index): + # Caller is responsible for ensuring we have an Index object. old_len = len(self.axes[axis]) new_len = len(new_labels) @@ -184,7 +184,9 @@ def set_axis(self, axis, new_labels): self.axes[axis] = new_labels - def rename_axis(self, mapper, axis, copy: bool = True, level=None): + def rename_axis( + self, mapper, axis: int, copy: bool = True, level=None + ) -> "BlockManager": """ Rename one of axes. @@ -193,7 +195,7 @@ def rename_axis(self, mapper, axis, copy: bool = True, level=None): mapper : unary callable axis : int copy : bool, default True - level : int, default None + level : int or None, default None """ obj = self.copy(deep=copy) obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level)) @@ -233,7 +235,7 @@ def _rebuild_blknos_and_blklocs(self): self._blklocs = new_blklocs @property - def items(self): + def items(self) -> Index: return self.axes[0] def _get_counts(self, f): @@ -623,7 +625,7 @@ def comp(s, regex=False): bm._consolidate_inplace() return bm - def is_consolidated(self): + def is_consolidated(self) -> bool: """ Return True if more than one block with the same dtype """ @@ -688,7 +690,7 @@ def get_numeric_data(self, copy: bool = False): self._consolidate_inplace() return self.combine([b for b in self.blocks if b.is_numeric], copy) - def combine(self, blocks, copy=True): + def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": """ return a new manager with the blocks """ if len(blocks) == 0: return self.make_empty() @@ -992,7 +994,6 @@ def delete(self, item): self.blocks = tuple( b for blkno, b in enumerate(self.blocks) if not is_blk_deleted[blkno] ) - self._shape = None self._rebuild_blknos_and_blklocs() def set(self, item, value): @@ -1160,7 +1161,6 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): self.axes[0] = new_axis self.blocks += (block,) - self._shape = None self._known_consolidated = False diff --git a/pandas/core/series.py b/pandas/core/series.py index d984225f8fd89..d565cbbdd5344 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -9,7 +9,6 @@ TYPE_CHECKING, Any, Callable, - Hashable, Iterable, List, Optional, @@ -23,7 +22,7 @@ from pandas._config import get_option from pandas._libs import lib, properties, reshape, tslibs -from pandas._typing import Label +from pandas._typing import Axis, DtypeObj, Label from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, doc from pandas.util._validators import validate_bool_kwarg, validate_percentile @@ -177,7 +176,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _typ = "series" - _name: Optional[Hashable] + _name: Label _metadata: List[str] = ["name"] _internal_names_set = {"index"} | generic.NDFrame._internal_names_set _accessors = {"dt", "cat", "str", "sparse"} @@ -391,9 +390,12 @@ def _can_hold_na(self): _index = None - def _set_axis(self, axis, labels, fastpath: bool = False) -> None: + def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None: """ Override generic, we want to set the _typ here. + + This is called from the cython code when we set the `index` attribute + directly, e.g. `series.index = [1, 2, 3]`. """ if not fastpath: labels = ensure_index(labels) @@ -413,6 +415,7 @@ def _set_axis(self, axis, labels, fastpath: bool = False) -> None: object.__setattr__(self, "_index", labels) if not fastpath: + # The ensure_index call aabove ensures we have an Index object self._data.set_axis(axis, labels) def _update_inplace(self, result, **kwargs): @@ -421,25 +424,25 @@ def _update_inplace(self, result, **kwargs): # ndarray compatibility @property - def dtype(self): + def dtype(self) -> DtypeObj: """ Return the dtype object of the underlying data. """ return self._data.dtype @property - def dtypes(self): + def dtypes(self) -> DtypeObj: """ Return the dtype object of the underlying data. """ return self._data.dtype @property - def name(self) -> Optional[Hashable]: + def name(self) -> Label: return self._name @name.setter - def name(self, value: Optional[Hashable]) -> None: + def name(self, value: Label) -> None: if not is_hashable(value): raise TypeError("Series.name must be a hashable type") object.__setattr__(self, "_name", value) @@ -689,7 +692,7 @@ def __array_ufunc__( inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) result = getattr(ufunc, method)(*inputs, **kwargs) - name: Optional[Hashable] + name: Label if len(set(names)) == 1: name = names[0] else: @@ -3983,7 +3986,7 @@ def rename( see_also_sub="", ) @Appender(generic.NDFrame.set_axis.__doc__) - def set_axis(self, labels, axis=0, inplace=False): + def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) @Substitution(**_shared_doc_kwargs) From f25ed6f2b1898627c4cf4cb68d8be96387142c4d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 09:47:08 -0800 Subject: [PATCH 209/388] misplaced DataFrame.join test (#32375) --- pandas/tests/frame/test_combine_concat.py | 73 +-------------------- pandas/tests/frame/test_join.py | 76 ++++++++++++++++++++++ pandas/tests/series/test_combine_concat.py | 2 +- 3 files changed, 78 insertions(+), 73 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 321eb5fe94daf..7eba2b873c4f4 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -8,7 +8,7 @@ import pandas._testing as tm -class TestDataFrameConcatCommon: +class TestDataFrameConcat: def test_concat_multiple_frames_dtypes(self): # GH 2759 @@ -107,77 +107,6 @@ def test_concat_tuple_keys(self): ) tm.assert_frame_equal(results, expected) - def test_join_str_datetime(self): - str_dates = ["20120209", "20120222"] - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - A = DataFrame(str_dates, index=range(2), columns=["aa"]) - C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) - - tst = A.join(C, on="aa") - - assert len(tst.columns) == 3 - - def test_join_multiindex_leftright(self): - # GH 10741 - df1 = pd.DataFrame( - [ - ["a", "x", 0.471780], - ["a", "y", 0.774908], - ["a", "z", 0.563634], - ["b", "x", -0.353756], - ["b", "y", 0.368062], - ["b", "z", -1.721840], - ["c", "x", 1], - ["c", "y", 2], - ["c", "z", 3], - ], - columns=["first", "second", "value1"], - ).set_index(["first", "second"]) - - df2 = pd.DataFrame( - [["a", 10], ["b", 20]], columns=["first", "value2"] - ).set_index(["first"]) - - exp = pd.DataFrame( - [ - [0.471780, 10], - [0.774908, 10], - [0.563634, 10], - [-0.353756, 20], - [0.368062, 20], - [-1.721840, 20], - [1.000000, np.nan], - [2.000000, np.nan], - [3.000000, np.nan], - ], - index=df1.index, - columns=["value1", "value2"], - ) - - # these must be the same results (but columns are flipped) - tm.assert_frame_equal(df1.join(df2, how="left"), exp) - tm.assert_frame_equal(df2.join(df1, how="right"), exp[["value2", "value1"]]) - - exp_idx = pd.MultiIndex.from_product( - [["a", "b"], ["x", "y", "z"]], names=["first", "second"] - ) - exp = pd.DataFrame( - [ - [0.471780, 10], - [0.774908, 10], - [0.563634, 10], - [-0.353756, 20], - [0.368062, 20], - [-1.721840, 20], - ], - index=exp_idx, - columns=["value1", "value2"], - ) - - tm.assert_frame_equal(df1.join(df2, how="right"), exp) - tm.assert_frame_equal(df2.join(df1, how="left"), exp[["value2", "value1"]]) - def test_concat_named_keys(self): # GH 14252 df = pd.DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]}) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 8c388a887158f..4d6e675c6765f 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -1,6 +1,9 @@ +from datetime import datetime + import numpy as np import pytest +import pandas as pd from pandas import DataFrame, Index, period_range import pandas._testing as tm @@ -216,3 +219,76 @@ def test_suppress_future_warning_with_sort_kw(sort_kw): with tm.assert_produces_warning(None, check_stacklevel=False): result = a.join([b, c], how="outer", sort=sort_kw) tm.assert_frame_equal(result, expected) + + +class TestDataFrameJoin: + def test_join_str_datetime(self): + str_dates = ["20120209", "20120222"] + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + A = DataFrame(str_dates, index=range(2), columns=["aa"]) + C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) + + tst = A.join(C, on="aa") + + assert len(tst.columns) == 3 + + def test_join_multiindex_leftright(self): + # GH 10741 + df1 = pd.DataFrame( + [ + ["a", "x", 0.471780], + ["a", "y", 0.774908], + ["a", "z", 0.563634], + ["b", "x", -0.353756], + ["b", "y", 0.368062], + ["b", "z", -1.721840], + ["c", "x", 1], + ["c", "y", 2], + ["c", "z", 3], + ], + columns=["first", "second", "value1"], + ).set_index(["first", "second"]) + + df2 = pd.DataFrame( + [["a", 10], ["b", 20]], columns=["first", "value2"] + ).set_index(["first"]) + + exp = pd.DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + [1.000000, np.nan], + [2.000000, np.nan], + [3.000000, np.nan], + ], + index=df1.index, + columns=["value1", "value2"], + ) + + # these must be the same results (but columns are flipped) + tm.assert_frame_equal(df1.join(df2, how="left"), exp) + tm.assert_frame_equal(df2.join(df1, how="right"), exp[["value2", "value1"]]) + + exp_idx = pd.MultiIndex.from_product( + [["a", "b"], ["x", "y", "z"]], names=["first", "second"] + ) + exp = pd.DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + ], + index=exp_idx, + columns=["value1", "value2"], + ) + + tm.assert_frame_equal(df1.join(df2, how="right"), exp) + tm.assert_frame_equal(df2.join(df1, how="left"), exp[["value2", "value1"]]) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index adb79f69c2d81..0766bfc37d7ca 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -5,7 +5,7 @@ from pandas import Series -class TestSeriesCombine: +class TestSeriesConcat: @pytest.mark.parametrize( "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"] ) From 78c1a743969572a4fe496aa848fd0aa75e6240da Mon Sep 17 00:00:00 2001 From: Iqrar Agalosi Nureyza Date: Tue, 3 Mar 2020 05:52:14 +0700 Subject: [PATCH 210/388] DOC: Fixed ES01, PR07, SA04 error in pandas.core.groupby.DataFrameGroupBy.shift (#32356) --- pandas/core/groupby/groupby.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 48c00140461b5..6362f11a3e032 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2312,11 +2312,12 @@ def _get_cythonized_result( return self._wrap_transformed_output(output) @Substitution(name="groupby") - @Appender(_common_see_also) def shift(self, periods=1, freq=None, axis=0, fill_value=None): """ Shift each group by periods observations. + If freq is passed, the index will be increased using the periods and the freq. + Parameters ---------- periods : int, default 1 @@ -2324,7 +2325,9 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): freq : str, optional Frequency string. axis : axis to shift, default 0 + Shift direction. fill_value : optional + The scalar value to use for newly introduced missing values. .. versionadded:: 0.24.0 @@ -2332,6 +2335,12 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): ------- Series or DataFrame Object shifted within each group. + + See Also + -------- + Index.shift : Shift values of Index. + tshift : Shift the time index, using the index’s frequency + if available. """ if freq is not None or axis != 0 or not isna(fill_value): return self.apply(lambda x: x.shift(periods, freq, axis, fill_value)) From 3e5fe8ebea89b6969036e04f9fb402cfe01c86e8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 2 Mar 2020 17:13:32 -0600 Subject: [PATCH 211/388] BUG: Pickle NA objects (#32104) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to https://docs.python.org/3/library/pickle.html#object.__reduce__, > If a string is returned, the string should be interpreted as the name > of a global variable. It should be the object’s local name relative to > its module; the pickle module searches the module namespace to determine > the object’s module. This behaviour is typically useful for singletons. Closes https://github.com/pandas-dev/pandas/issues/31847 --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/missing.pyx | 3 +++ pandas/tests/scalar/test_na_scalar.py | 25 +++++++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 1b6098e6b6ac1..808e6ae709ce9 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -74,6 +74,7 @@ Bug fixes **I/O** - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +- Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`) - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). **Experimental dtypes** diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 4d17a6f883c1c..c54cb652d7b21 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -364,6 +364,9 @@ class NAType(C_NAType): exponent = 31 if is_32bit else 61 return 2 ** exponent - 1 + def __reduce__(self): + return "NA" + # Binary arithmetic and comparison ops -> propagate __add__ = _create_binary_propagating_op("__add__") diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py index dcb9d66708724..07656de2e9062 100644 --- a/pandas/tests/scalar/test_na_scalar.py +++ b/pandas/tests/scalar/test_na_scalar.py @@ -1,3 +1,5 @@ +import pickle + import numpy as np import pytest @@ -267,3 +269,26 @@ def test_integer_hash_collision_set(): assert len(result) == 2 assert NA in result assert hash(NA) in result + + +def test_pickle_roundtrip(): + # https://github.com/pandas-dev/pandas/issues/31847 + result = pickle.loads(pickle.dumps(pd.NA)) + assert result is pd.NA + + +def test_pickle_roundtrip_pandas(): + result = tm.round_trip_pickle(pd.NA) + assert result is pd.NA + + +@pytest.mark.parametrize( + "values, dtype", [([1, 2, pd.NA], "Int64"), (["A", "B", pd.NA], "string")] +) +@pytest.mark.parametrize("as_frame", [True, False]) +def test_pickle_roundtrip_containers(as_frame, values, dtype): + s = pd.Series(pd.array(values, dtype=dtype)) + if as_frame: + s = s.to_frame(name="A") + result = tm.round_trip_pickle(s) + tm.assert_equal(result, s) From 68603b15f361bd18d500dfae7a5089981a5d5d19 Mon Sep 17 00:00:00 2001 From: Adrian Mastronardi <7809331+AdrianMastronardi@users.noreply.github.com> Date: Mon, 2 Mar 2020 22:22:02 -0300 Subject: [PATCH 212/388] DOC: Fix SA04 errors in docstrings #28792 (#32182) --- pandas/core/base.py | 13 ++++++++----- pandas/core/computation/eval.py | 6 ++++-- pandas/core/generic.py | 17 ++++++++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index ff251324d4896..f55d9f905945d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1035,7 +1035,8 @@ def argmin(self, axis=None, skipna=True, *args, **kwargs): See Also -------- - numpy.ndarray.argmin + numpy.ndarray.argmin : Return indices of the minimum values along + the given axis. """ nv.validate_minmax_axis(axis) nv.validate_argmax_with_skipna(skipna, args, kwargs) @@ -1055,7 +1056,8 @@ def tolist(self): See Also -------- - numpy.ndarray.tolist + numpy.ndarray.tolist : Return the array as an a.ndim-levels deep + nested list of Python scalars. """ if not isinstance(self._values, np.ndarray): # check for ndarray instead of dtype to catch DTA/TDA @@ -1402,7 +1404,8 @@ def memory_usage(self, deep=False): See Also -------- - numpy.ndarray.nbytes + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. Notes ----- @@ -1473,8 +1476,8 @@ def factorize(self, sort=False, na_sentinel=-1): See Also -------- - sort_values - numpy.searchsorted + sort_values : Sort by the values along either axis. + numpy.searchsorted : Similar method from NumPy. Notes ----- diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index fe3d3f49f16a7..a488aac08e060 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -266,8 +266,10 @@ def eval( See Also -------- - DataFrame.query - DataFrame.eval + DataFrame.query : Evaluates a boolean expression to query the columns + of a frame. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. Notes ----- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 890c0540d9851..5461c134fa454 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1753,8 +1753,9 @@ def empty(self) -> bool_t: See Also -------- - Series.dropna - DataFrame.dropna + Series.dropna : Return series without null values. + DataFrame.dropna : Return DataFrame with labels on given axis omitted + where (all or any) data are missing. Notes ----- @@ -2173,7 +2174,7 @@ def to_json( See Also -------- - read_json + read_json : Convert a JSON string to pandas object. Notes ----- @@ -4452,7 +4453,8 @@ def filter( See Also -------- - DataFrame.loc + DataFrame.loc : Access a group of rows and columns + by label(s) or a boolean array. Notes ----- @@ -4884,9 +4886,10 @@ def sample( See Also -------- - DataFrame.apply - DataFrame.applymap - Series.map + DataFrame.apply : Apply a function along input axis of DataFrame. + DataFrame.applymap : Apply a function elementwise on a whole DataFrame. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. Notes ----- From 712fd01dcbee3904a35298108cfe6ecef52ae676 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 17:34:30 -0800 Subject: [PATCH 213/388] CLN: remove _igetitem_cache (#32319) --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 61715397e8e0b..bfb83b59497e6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2741,7 +2741,7 @@ def _set_value(self, index, col, value, takeable: bool = False): """ try: if takeable is True: - series = self._iget_item_cache(col) + series = self._ixs(col, axis=1) series._set_value(index, value, takeable=True) return diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5461c134fa454..d8fa829eece93 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3463,15 +3463,6 @@ def _get_item_cache(self, item): res._is_copy = self._is_copy return res - def _iget_item_cache(self, item: int): - """Return the cached item, item represents a positional indexer.""" - ax = self._info_axis - if ax.is_unique: - lower = self._get_item_cache(ax[item]) - else: - return self._ixs(item, axis=1) - return lower - def _box_item_values(self, key, values): raise AbstractMethodError(self) From 08deb10ed3bd2dceafa6e8e53d78230b5d84f66b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 17:36:18 -0800 Subject: [PATCH 214/388] Avoid unnecessary values_from_object (#32398) --- pandas/_libs/lib.pyx | 4 +--- pandas/core/common.py | 2 +- pandas/core/dtypes/missing.py | 2 +- pandas/core/reshape/merge.py | 3 --- pandas/core/strings.py | 3 +-- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7a18429f21a18..61d6a660a0357 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -100,11 +100,9 @@ def values_from_object(obj: object): """ func: object - if getattr(obj, '_typ', '') == 'dataframe': - return obj.values - func = getattr(obj, '_internal_get_values', None) if func is not None: + # Includes DataFrame, for which we get frame.values obj = func() return obj diff --git a/pandas/core/common.py b/pandas/core/common.py index 705c618fc49dc..6230ee34bcd50 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -122,7 +122,7 @@ def is_bool_indexer(key: Any) -> bool: is_array_like(key) and is_extension_array_dtype(key.dtype) ): if key.dtype == np.object_: - key = np.asarray(values_from_object(key)) + key = np.asarray(key) if not lib.is_bool_array(key): na_msg = "Cannot mask with non-boolean array containing NA / NaN values" diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index ee74b02af9516..682a0722de3b7 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -581,7 +581,7 @@ def remove_na_arraylike(arr): if is_extension_array_dtype(arr): return arr[notna(arr)] else: - return arr[notna(lib.values_from_object(arr))] + return arr[notna(np.asarray(arr))] def is_valid_nat_for_dtype(obj, dtype: DtypeObj) -> bool: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index faac472b3fc31..d4d9c26686891 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1923,9 +1923,6 @@ def _factorize_keys(lk, rk, sort=True): def _sort_labels(uniques: np.ndarray, left, right): - if not isinstance(uniques, np.ndarray): - # tuplesafe - uniques = Index(uniques).values llength = len(left) labels = np.concatenate([left, right]) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b0c5d6a48d99a..3be9c5fcdfb26 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -36,7 +36,6 @@ from pandas.core.algorithms import take_1d from pandas.core.base import NoNewAttributesMixin -import pandas.core.common as com from pandas.core.construction import extract_array if TYPE_CHECKING: @@ -782,7 +781,7 @@ def rep(x, r): return str.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) - result = libops.vec_binop(com.values_from_object(arr), repeats, rep) + result = libops.vec_binop(np.asarray(arr), repeats, rep) return result From d219c2c63d29b3affbc0e67d7c3dbfb6946b54de Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 17:44:13 -0800 Subject: [PATCH 215/388] ENH: infer freq in timedelta_range (#32377) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_testing.py | 4 ++-- pandas/core/arrays/timedeltas.py | 12 ++++++++++-- .../tests/indexes/timedeltas/test_timedelta_range.py | 1 + 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0f18a1fd81815..999dee83cf7e0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -66,6 +66,7 @@ Other enhancements - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`) - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`) - `OptionError` is now exposed in `pandas.errors` (:issue:`27553`) +- :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_testing.py b/pandas/_testing.py index fce06e216dfd7..b0f18cb6fdd39 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -742,9 +742,9 @@ def repr_class(x): raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) -def assert_attr_equal(attr, left, right, obj="Attributes"): +def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): """ - checks attributes are equal. Both objects must have attribute. + Check attributes are equal. Both objects must have attribute. Parameters ---------- diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 81fc934748d3e..749489a0a04fb 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -39,6 +39,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Tick @@ -141,8 +142,7 @@ def dtype(self): # Constructors def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): - if isinstance(values, (ABCSeries, ABCIndexClass)): - values = values._values + values = extract_array(values) inferred_freq = getattr(values, "_freq", None) @@ -258,6 +258,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None): index = _generate_regular_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype("i8") + if len(index) >= 2: + # Infer a frequency + td = Timedelta(index[1] - index[0]) + freq = to_offset(td) if not left_closed: index = index[1:] @@ -614,6 +618,10 @@ def __floordiv__(self, other): if self.freq is not None: # Note: freq gets division, not floor-division freq = self.freq / other + if freq.nanos == 0 and self.freq.nanos != 0: + # e.g. if self.freq is Nano(1) then dividing by 2 + # rounds down to zero + freq = None return type(self)(result.view("m8[ns]"), freq=freq) if not hasattr(other, "dtype"): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 9f12af9a96104..c07a6471c732f 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -38,6 +38,7 @@ def test_linspace_behavior(self, periods, freq): result = timedelta_range(start="0 days", end="4 days", periods=periods) expected = timedelta_range(start="0 days", end="4 days", freq=freq) tm.assert_index_equal(result, expected) + assert result.freq == freq def test_errors(self): # not enough params From bdcb5da0906f7d8c7437d8531b8e3219f1ef554c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 17:50:01 -0800 Subject: [PATCH 216/388] BUG: 2D DTA/TDA arithmetic with object-dtype (#32185) --- pandas/core/arrays/datetimelike.py | 21 ++++------- pandas/tests/arithmetic/test_datetime64.py | 41 +++++++++++++++++++++ pandas/tests/arithmetic/test_timedelta64.py | 14 +++++++ 3 files changed, 63 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f637e16caa4c6..8c870c6255200 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -42,6 +42,7 @@ from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com +from pandas.core.construction import array, extract_array from pandas.core.indexers import check_array_indexer from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import invalid_comparison, make_invalid_op @@ -623,7 +624,7 @@ def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_object_dtype(dtype): - return self._box_values(self.asi8) + return self._box_values(self.asi8.ravel()).reshape(self.shape) elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return self._format_native_types() elif is_integer_dtype(dtype): @@ -1256,19 +1257,13 @@ def _addsub_object_array(self, other: np.ndarray, op): PerformanceWarning, ) - # For EA self.astype('O') returns a numpy array, not an Index - left = self.astype("O") + # Caller is responsible for broadcasting if necessary + assert self.shape == other.shape, (self.shape, other.shape) - res_values = op(left, np.array(other)) - kwargs = {} - if not is_period_dtype(self): - kwargs["freq"] = "infer" - try: - res = type(self)._from_sequence(res_values, **kwargs) - except ValueError: - # e.g. we've passed a Timestamp to TimedeltaArray - res = res_values - return res + res_values = op(self.astype("O"), np.array(other)) + result = array(res_values.ravel()) + result = extract_array(result, extract_numpy=True).reshape(self.shape) + return result def _time_shift(self, periods, freq=None): """ diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index d3f9ac4f3f8b2..f7211ab5f9fd4 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -27,6 +27,7 @@ date_range, ) import pandas._testing as tm +from pandas.core.arrays import DatetimeArray, TimedeltaArray from pandas.core.ops import roperator from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, @@ -956,6 +957,18 @@ def test_dt64arr_sub_NaT(self, box_with_array): # ------------------------------------------------------------- # Subtraction of datetime-like array-like + def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): + dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + expected = dti - dti + + obj = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + warn = PerformanceWarning if box_with_array is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + result = obj - obj.astype(object) + tm.assert_equal(result, expected) + def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): dti = pd.date_range("2016-01-01", periods=3, tz=None) dt64vals = dti.values @@ -2395,3 +2408,31 @@ def test_shift_months(years, months): raw = [x + pd.offsets.DateOffset(years=years, months=months) for x in dti] expected = DatetimeIndex(raw) tm.assert_index_equal(actual, expected) + + +def test_dt64arr_addsub_object_dtype_2d(): + # block-wise DataFrame operations will require operating on 2D + # DatetimeArray/TimedeltaArray, so check that specifically. + dti = pd.date_range("1994-02-13", freq="2W", periods=4) + dta = dti._data.reshape((4, 1)) + + other = np.array([[pd.offsets.Day(n)] for n in range(4)]) + assert other.shape == dta.shape + + with tm.assert_produces_warning(PerformanceWarning): + result = dta + other + with tm.assert_produces_warning(PerformanceWarning): + expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1) + + assert isinstance(result, DatetimeArray) + assert result.freq is None + tm.assert_numpy_array_equal(result._data, expected._data) + + with tm.assert_produces_warning(PerformanceWarning): + # Case where we expect to get a TimedeltaArray back + result2 = dta - dta.astype(object) + + assert isinstance(result2, TimedeltaArray) + assert result2.shape == (4, 1) + assert result2.freq is None + assert (result2.asi8 == 0).all() diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 300e468c34e65..b11fcfd20b8c4 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -532,6 +532,20 @@ def test_tda_add_sub_index(self): expected = tdi - tdi tm.assert_index_equal(result, expected) + def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture): + # Result should be cast back to DatetimeArray + dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + dti._set_freq(None) + tdi = dti - dti + + obj = tm.box_expected(tdi, box_df_fail) + other = tm.box_expected(dti, box_df_fail) + + warn = PerformanceWarning if box_df_fail is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + result = obj + other.astype(object) + tm.assert_equal(result, other) + # ------------------------------------------------------------- # Binary operations TimedeltaIndex and timedelta-like From f2a13253577efde3f4e9dfd5148c7fe9c99f1568 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 17:56:50 -0800 Subject: [PATCH 217/388] TST: broken off from #32187 (#32258) Co-authored-by: Simon Hawkins --- pandas/tests/indexing/test_floats.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index fff5ca03e80f4..18b9898e7d800 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -40,20 +40,7 @@ def check(self, result, original, indexer, getitem): tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize( - "index_func", - [ - tm.makeStringIndex, - tm.makeUnicodeIndex, - tm.makeCategoricalIndex, - tm.makeDateIndex, - tm.makeTimedeltaIndex, - tm.makePeriodIndex, - tm.makeIntIndex, - tm.makeRangeIndex, - ], - ) - def test_scalar_error(self, index_func): + def test_scalar_error(self, series_with_simple_index): # GH 4892 # float_indexers should raise exceptions @@ -62,11 +49,9 @@ def test_scalar_error(self, index_func): # but is specifically testing for the error # message - i = index_func(5) + s = series_with_simple_index - s = Series(np.arange(len(i)), index=i) - - msg = "Cannot index by location index" + msg = "Cannot index by location index with a non-integer key" with pytest.raises(TypeError, match=msg): s.iloc[3.0] From d0126236c8e75cec7560cd913f8a0225f3c89886 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 17:58:26 -0800 Subject: [PATCH 218/388] REF: simplify PeriodIndex._shallow_copy (#32280) --- pandas/core/indexes/base.py | 3 +++ pandas/core/indexes/datetimelike.py | 3 ++- pandas/core/indexes/period.py | 16 +++------------- pandas/tests/base/test_ops.py | 5 +++++ pandas/tests/indexes/datetimes/test_indexing.py | 8 ++++++++ pandas/tests/indexes/period/test_period.py | 12 ++++++++---- pandas/tests/indexes/test_common.py | 7 +++++-- pandas/tests/indexes/timedeltas/test_indexing.py | 8 ++++++++ 8 files changed, 42 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ae2387f0fd7b4..ceb3f26a0526a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4236,6 +4236,9 @@ def putmask(self, mask, value): values = self.values.copy() try: np.putmask(values, mask, self._convert_for_op(value)) + if is_period_dtype(self.dtype): + # .values cast to object, so we need to cast back + values = type(self)(values)._data return self._shallow_copy(values) except (ValueError, TypeError) as err: if is_object_dtype(self): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 654d423b37c81..d1e21a2fe7657 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -520,7 +520,8 @@ def where(self, cond, other=None): other = other.view("i8") result = np.where(cond, values, other).astype("i8") - return self._shallow_copy(result) + arr = type(self._data)._simple_new(result, dtype=self.dtype) + return type(self)._simple_new(arr, name=self.name) def _summary(self, name=None) -> str: """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9eeb41f735015..ebf69c49c029a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -250,22 +250,11 @@ def _has_complex_internals(self): return True def _shallow_copy(self, values=None, name: Label = no_default): - # TODO: simplify, figure out type of values name = name if name is not no_default else self.name if values is None: values = self._data - if isinstance(values, type(self)): - values = values._data - - if not isinstance(values, PeriodArray): - if isinstance(values, np.ndarray) and values.dtype == "i8": - values = PeriodArray(values, freq=self.freq) - else: - # GH#30713 this should never be reached - raise TypeError(type(values), getattr(values, "dtype", None)) - return self._simple_new(values, name=name) def _maybe_convert_timedelta(self, other): @@ -618,10 +607,11 @@ def insert(self, loc, item): if not isinstance(item, Period) or self.freq != item.freq: return self.astype(object).insert(loc, item) - idx = np.concatenate( + i8result = np.concatenate( (self[:loc].asi8, np.array([item.ordinal]), self[loc:].asi8) ) - return self._shallow_copy(idx) + arr = type(self._data)._simple_new(i8result, dtype=self.dtype) + return type(self)._simple_new(arr, name=self.name) def join(self, other, how="left", level=None, return_indexers=False, sort=False): """ diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 06ba6cc34ad92..ff1d2e1c76214 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -14,6 +14,7 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_object_dtype, + is_period_dtype, needs_i8_conversion, ) @@ -295,6 +296,10 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): obj[0:2] = pd.NaT values = obj._values + elif is_period_dtype(obj): + values[0:2] = iNaT + parr = type(obj._data)(values, dtype=obj.dtype) + values = obj._shallow_copy(parr) elif needs_i8_conversion(obj): values[0:2] = iNaT values = obj._shallow_copy(values) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index ceab670fb5041..554ae76979ba8 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -121,6 +121,14 @@ def test_dti_custom_getitem_matplotlib_hackaround(self): class TestWhere: + def test_where_doesnt_retain_freq(self): + dti = date_range("20130101", periods=3, freq="D", name="idx") + cond = [True, True, False] + expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx") + + result = dti.where(cond, dti[::-1]) + tm.assert_index_equal(result, expected) + def test_where_other(self): # other is ndarray or Index i = pd.date_range("20130101", periods=3, tz="US/Eastern") diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 40c7ffba46450..ab3e967f12360 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -117,7 +117,6 @@ def test_make_time_series(self): assert isinstance(series, Series) def test_shallow_copy_empty(self): - # GH13067 idx = PeriodIndex([], freq="M") result = idx._shallow_copy() @@ -125,11 +124,16 @@ def test_shallow_copy_empty(self): tm.assert_index_equal(result, expected) - def test_shallow_copy_i8(self): + def test_shallow_copy_disallow_i8(self): # GH-24391 pi = period_range("2018-01-01", periods=3, freq="2D") - result = pi._shallow_copy(pi.asi8) - tm.assert_index_equal(result, pi) + with pytest.raises(AssertionError, match="ndarray"): + pi._shallow_copy(pi.asi8) + + def test_shallow_copy_requires_disallow_period_index(self): + pi = period_range("2018-01-01", periods=3, freq="2D") + with pytest.raises(AssertionError, match="PeriodIndex"): + pi._shallow_copy(pi) def test_view_asi8(self): idx = PeriodIndex([], freq="M") diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index b46e6514b4536..c6ba5c9d61e9e 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -10,7 +10,7 @@ from pandas._libs.tslibs import iNaT -from pandas.core.dtypes.common import needs_i8_conversion +from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion import pandas as pd from pandas import CategoricalIndex, MultiIndex, RangeIndex @@ -219,7 +219,10 @@ def test_get_unique_index(self, indices): if not indices._can_hold_na: pytest.skip("Skip na-check if index cannot hold na") - if needs_i8_conversion(indices): + if is_period_dtype(indices): + vals = indices[[0] * 5]._data + vals[0] = pd.NaT + elif needs_i8_conversion(indices): vals = indices.asi8[[0] * 5] vals[0] = iNaT else: diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 14fff6f9c85b5..5dec799832291 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -66,6 +66,14 @@ def test_timestamp_invalid_key(self, key): class TestWhere: + def test_where_doesnt_retain_freq(self): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + cond = [True, True, False] + expected = TimedeltaIndex([tdi[0], tdi[1], tdi[0]], freq=None, name="idx") + + result = tdi.where(cond, tdi[::-1]) + tm.assert_index_equal(result, expected) + def test_where_invalid_dtypes(self): tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") From 604beff82f7d49aa2f681d9da0052548d29b5d84 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 18:02:43 -0800 Subject: [PATCH 219/388] CLN: setitem_with_indexer cleanups (#32341) --- pandas/core/indexing.py | 58 ++++++++++++----------------------------- 1 file changed, 17 insertions(+), 41 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3ab180bafd156..35e61ab6a59c9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1633,14 +1633,12 @@ def _setitem_with_indexer(self, indexer, value): info_idx = [info_idx] labels = item_labels[info_idx] + plane_indexer = indexer[:1] + lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index) + # lplane_indexer gives the expected length of obj[indexer[0]] + if len(labels) == 1: # We can operate on a single column - item = labels[0] - idx = indexer[0] - - plane_indexer = tuple([idx]) - lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index) - # lplane_indexer gives the expected length of obj[idx] # require that we are setting the right number of values that # we are indexing @@ -1652,11 +1650,6 @@ def _setitem_with_indexer(self, indexer, value): "length than the value" ) - # non-mi - else: - plane_indexer = indexer[:1] - lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index) - def setter(item, v): ser = self.obj[item] pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer @@ -1718,18 +1711,23 @@ def setter(item, v): for i, item in enumerate(labels): - # setting with a list, recoerces + # setting with a list, re-coerces setter(item, value[:, i].tolist()) - # we have an equal len list/ndarray - elif _can_do_equal_len( - labels, value, plane_indexer, lplane_indexer, self.obj + elif ( + len(labels) == 1 + and lplane_indexer == len(value) + and not is_scalar(plane_indexer[0]) ): + # we have an equal len list/ndarray setter(labels[0], value) - # per label values - else: + elif lplane_indexer == 0 and len(value) == len(self.obj.index): + # We get here in one case via .loc with a all-False mask + pass + else: + # per-label values if len(labels) != len(value): raise ValueError( "Must have equal len keys and value " @@ -1746,7 +1744,6 @@ def setter(item, v): else: if isinstance(indexer, tuple): - indexer = maybe_convert_ix(*indexer) # if we are setting on the info axis ONLY # set using those methods to avoid block-splitting @@ -1764,6 +1761,8 @@ def setter(item, v): self.obj[item_labels[indexer[info_axis]]] = value return + indexer = maybe_convert_ix(*indexer) + if isinstance(value, (ABCSeries, dict)): # TODO(EA): ExtensionBlock.setitem this causes issues with # setting for extensionarrays that store dicts. Need to decide @@ -2277,26 +2276,3 @@ def _maybe_numeric_slice(df, slice_, include_bool=False): dtypes.append(bool) slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns] return slice_ - - -def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj) -> bool: - """ - Returns - ------- - bool - True if we have an equal len settable. - """ - if not len(labels) == 1 or not np.iterable(value) or is_scalar(plane_indexer[0]): - return False - - item = labels[0] - index = obj[item].index - - values_len = len(value) - # equal len list/ndarray - if len(index) == values_len: - return True - elif lplane_indexer == values_len: - return True - - return False From 4ba48f0005448ee8a5c0a3e964afcda2f111f627 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 18:05:28 -0800 Subject: [PATCH 220/388] BUG: None / Timedelta incorrectly returning NaT (#32340) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/tslibs/timedeltas.pyx | 10 ++++- .../tests/scalar/timedelta/test_arithmetic.py | 40 +++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 999dee83cf7e0..60c2ba285bd25 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -214,6 +214,7 @@ Timedelta ^^^^^^^^^ - Bug in constructing a :class:`Timedelta` with a high precision integer that would round the :class:`Timedelta` components (:issue:`31354`) +- Bug in dividing ``np.nan`` or ``None`` by :class:`Timedelta`` incorrectly returning ``NaT`` (:issue:`31869`) - Timezones diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 66660c5f641fd..298028227e18b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1407,7 +1407,14 @@ class Timedelta(_Timedelta): # convert to Timedelta below pass + elif util.is_nan(other): + # i.e. np.nan or np.float64("NaN") + raise TypeError("Cannot divide float by Timedelta") + elif hasattr(other, 'dtype'): + if other.dtype.kind == "O": + # GH#31869 + return np.array([x / self for x in other]) return other / self.to_timedelta64() elif not _validate_ops_compat(other): @@ -1415,7 +1422,8 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: - return NaT + # In this context we treat NaT as timedelta-like + return np.nan return float(other.value) / self.value def __floordiv__(self, other): diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 230a14aeec60a..ea02a76275443 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -412,6 +412,46 @@ def test_td_rdiv_timedeltalike_scalar(self): assert np.timedelta64(60, "h") / td == 0.25 + def test_td_rdiv_na_scalar(self): + # GH#31869 None gets cast to NaT + td = Timedelta(10, unit="d") + + result = NaT / td + assert np.isnan(result) + + result = None / td + assert np.isnan(result) + + result = np.timedelta64("NaT") / td + assert np.isnan(result) + + with pytest.raises(TypeError, match="cannot use operands with types dtype"): + np.datetime64("NaT") / td + + with pytest.raises(TypeError, match="Cannot divide float by Timedelta"): + np.nan / td + + def test_td_rdiv_ndarray(self): + td = Timedelta(10, unit="d") + + arr = np.array([td], dtype=object) + result = arr / td + expected = np.array([1], dtype=np.float64) + tm.assert_numpy_array_equal(result, expected) + + arr = np.array([None]) + result = arr / td + expected = np.array([np.nan]) + tm.assert_numpy_array_equal(result, expected) + + arr = np.array([np.nan], dtype=object) + with pytest.raises(TypeError, match="Cannot divide float by Timedelta"): + arr / td + + arr = np.array([np.nan], dtype=np.float64) + with pytest.raises(TypeError, match="cannot use operands with types dtype"): + arr / td + # --------------------------------------------------------------- # Timedelta.__floordiv__ From 116f8d2d3a18da9ecd254d753eca963e18581d96 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Tue, 3 Mar 2020 03:48:37 +0100 Subject: [PATCH 221/388] TST: Using more fixtures in of tests/base/test_ops.py (#32313) --- pandas/conftest.py | 10 +++ pandas/tests/base/test_ops.py | 116 ++++++++++++++++----------- pandas/tests/series/test_validate.py | 10 ++- 3 files changed, 88 insertions(+), 48 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index d6b9576d7729f..dcfc523315c8b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1047,6 +1047,16 @@ def series_with_simple_index(indices): for dtype in _narrow_dtypes } + +@pytest.fixture(params=_narrow_series.keys()) +def narrow_series(request): + """ + Fixture for Series with low precision data types + """ + # copy to avoid mutation, e.g. setting .name + return _narrow_series[request.param].copy() + + _index_or_series_objs = {**indices_dict, **_series, **_narrow_series} diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index ff1d2e1c76214..39dca1a9742df 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -11,6 +11,7 @@ from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import ( + is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_object_dtype, @@ -802,62 +803,83 @@ def test_fillna(self): assert o is not result @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") - def test_memory_usage(self): - for o in self.objs: - res = o.memory_usage() - res_deep = o.memory_usage(deep=True) - - if is_object_dtype(o) or ( - isinstance(o, Series) and is_object_dtype(o.index) - ): - # if there are objects, only deep will pick them up - assert res_deep > res - else: - assert res == res_deep - - if isinstance(o, Series): - assert ( - o.memory_usage(index=False) + o.index.memory_usage() - ) == o.memory_usage(index=True) + def test_memory_usage(self, index_or_series_obj): + obj = index_or_series_obj + res = obj.memory_usage() + res_deep = obj.memory_usage(deep=True) - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = res_deep - sys.getsizeof(o) - assert abs(diff) < 100 + is_object = is_object_dtype(obj) or ( + isinstance(obj, Series) and is_object_dtype(obj.index) + ) + is_categorical = is_categorical_dtype(obj) or ( + isinstance(obj, Series) and is_categorical_dtype(obj.index) + ) - def test_searchsorted(self): - # See gh-12238 - for o in self.objs: - index = np.searchsorted(o, max(o)) - assert 0 <= index <= len(o) + if len(obj) == 0: + assert res_deep == res == 0 + elif is_object or is_categorical: + # only deep will pick them up + assert res_deep > res + else: + assert res == res_deep - index = np.searchsorted(o, max(o), sorter=range(len(o))) - assert 0 <= index <= len(o) + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = res_deep - sys.getsizeof(obj) + assert abs(diff) < 100 - def test_validate_bool_args(self): - invalid_values = [1, "True", [1, 2, 3], 5.0] + def test_memory_usage_components_series(self, series_with_simple_index): + series = series_with_simple_index + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + def test_memory_usage_components_narrow_series(self, narrow_series): + series = narrow_series + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + def test_searchsorted(self, index_or_series_obj): + # numpy.searchsorted calls obj.searchsorted under the hood. + # See gh-12238 + obj = index_or_series_obj - for value in invalid_values: - msg = "expected type bool" - with pytest.raises(ValueError, match=msg): - self.int_series.drop_duplicates(inplace=value) + if isinstance(obj, pd.MultiIndex): + # See gh-14833 + pytest.skip("np.searchsorted doesn't work on pd.MultiIndex") - def test_getitem(self): - for i in self.indexes: - s = pd.Series(i) + max_obj = max(obj, default=0) + index = np.searchsorted(obj, max_obj) + assert 0 <= index <= len(obj) - assert i[0] == s.iloc[0] - assert i[5] == s.iloc[5] - assert i[-1] == s.iloc[-1] + index = np.searchsorted(obj, max_obj, sorter=range(len(obj))) + assert 0 <= index <= len(obj) - assert i[-1] == i[9] + def test_access_by_position(self, indices): + index = indices - msg = "index 20 is out of bounds for axis 0 with size 10" - with pytest.raises(IndexError, match=msg): - i[20] - msg = "single positional indexer is out-of-bounds" - with pytest.raises(IndexError, match=msg): - s.iloc[20] + if len(index) == 0: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(index, pd.MultiIndex): + pytest.skip("Can't instantiate Series from MultiIndex") + + series = pd.Series(index) + assert index[0] == series.iloc[0] + assert index[5] == series.iloc[5] + assert index[-1] == series.iloc[-1] + + size = len(index) + assert index[-1] == index[size - 1] + + msg = f"index {size} is out of bounds for axis 0 with size {size}" + with pytest.raises(IndexError, match=msg): + index[size] + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + series.iloc[size] @pytest.mark.parametrize("indexer_klass", [list, pd.Index]) @pytest.mark.parametrize( diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index 511d24ca7fa29..e2f050650b298 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -3,7 +3,15 @@ @pytest.mark.parametrize( "func", - ["reset_index", "_set_name", "sort_values", "sort_index", "rename", "dropna"], + [ + "reset_index", + "_set_name", + "sort_values", + "sort_index", + "rename", + "dropna", + "drop_duplicates", + ], ) @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) def test_validate_bool_args(string_series, func, inplace): From e2f1bc02ef75776eccf155922304a7c67fa12621 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Mar 2020 18:58:52 -0800 Subject: [PATCH 222/388] CLN: remove unused values from interpolate call (#32400) --- pandas/core/generic.py | 1 - pandas/core/internals/blocks.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d8fa829eece93..ad92ce3c4a992 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6793,7 +6793,6 @@ def interpolate( method=method, axis=ax, index=index, - values=_maybe_transposed_self, limit=limit, limit_direction=limit_direction, limit_area=limit_area, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 34fa4c0e6544e..67b734b8890f4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1107,7 +1107,6 @@ def interpolate( method="pad", axis=0, index=None, - values=None, inplace=False, limit=None, limit_direction="forward", @@ -1157,7 +1156,6 @@ def check_int_bool(self, inplace): return self._interpolate( method=m, index=index, - values=values, axis=axis, limit=limit, limit_direction=limit_direction, @@ -1211,7 +1209,6 @@ def _interpolate( self, method=None, index=None, - values=None, fill_value=None, axis=0, limit=None, From fcadff30da9feb3edb3acda662ff6143b7cb2d9f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Tue, 3 Mar 2020 05:00:26 +0200 Subject: [PATCH 223/388] CLN: some code cleanups to pandas/_libs/missing.pyx (#32367) --- pandas/_libs/missing.pyx | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index c54cb652d7b21..dacf454824190 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -10,10 +10,13 @@ cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.tslibs.np_datetime cimport ( - get_timedelta64_value, get_datetime64_value) + +from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value from pandas._libs.tslibs.nattype cimport ( - checknull_with_nat, c_NaT as NaT, is_null_datetimelike) + c_NaT as NaT, + checknull_with_nat, + is_null_datetimelike, +) from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op from pandas.compat import is_platform_32bit @@ -44,7 +47,7 @@ cpdef bint checknull(object val): Returns ------- - result : bool + bool Notes ----- @@ -223,7 +226,7 @@ def isnaobj2d_old(arr: ndarray) -> ndarray: Returns ------- - result : ndarray (dtype=np.bool_) + ndarray (dtype=np.bool_) Notes ----- @@ -248,17 +251,11 @@ def isnaobj2d_old(arr: ndarray) -> ndarray: def isposinf_scalar(val: object) -> bool: - if util.is_float_object(val) and val == INF: - return True - else: - return False + return util.is_float_object(val) and val == INF def isneginf_scalar(val: object) -> bool: - if util.is_float_object(val) and val == NEGINF: - return True - else: - return False + return util.is_float_object(val) and val == NEGINF cdef inline bint is_null_datetime64(v): @@ -426,7 +423,6 @@ class NAType(C_NAType): return NA elif isinstance(other, np.ndarray): return np.where(other == 1, other, NA) - return NotImplemented # Logical ops using Kleene logic @@ -436,8 +432,7 @@ class NAType(C_NAType): return False elif other is True or other is C_NA: return NA - else: - return NotImplemented + return NotImplemented __rand__ = __and__ @@ -446,8 +441,7 @@ class NAType(C_NAType): return True elif other is False or other is C_NA: return NA - else: - return NotImplemented + return NotImplemented __ror__ = __or__ From 861df91ae9c0742d0ee0108b36f54dfac6a4506e Mon Sep 17 00:00:00 2001 From: s-scherrer Date: Tue, 3 Mar 2020 03:02:22 +0000 Subject: [PATCH 224/388] BUG: fixes bug when using sep=None and comment keyword for read_csv (#31667) --- doc/source/whatsnew/v1.1.0.rst | 2 ++ pandas/io/parsers.py | 16 +++++++++------- .../tests/io/parser/test_python_parser_only.py | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 60c2ba285bd25..1df0b9dc5554c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -297,8 +297,10 @@ I/O - Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for ``coerce_timestamps``; following pyarrow's default allows writing nanosecond timestamps with ``version="2.0"`` (:issue:`31652`). +- Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`) - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) + Plotting ^^^^^^^^ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9d1687e20a949..bc2fb9f0f41bc 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2379,19 +2379,21 @@ class MyDialect(csv.Dialect): dia = MyDialect - sniff_sep = True - if sep is not None: - sniff_sep = False dia.delimiter = sep - # attempt to sniff the delimiter - if sniff_sep: + else: + # attempt to sniff the delimiter from the first valid line, + # i.e. no comment line and not in skiprows line = f.readline() - while self.skipfunc(self.pos): + lines = self._check_comments([[line]])[0] + while self.skipfunc(self.pos) or not lines: self.pos += 1 line = f.readline() + lines = self._check_comments([[line]])[0] - line = self._check_comments([line])[0] + # since `line` was a string, lines will be a list containing + # only a single string + line = lines[0] self.pos += 1 self.line_pos += 1 diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index 7367b19b40dc3..4d933fa02d36f 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -66,6 +66,24 @@ def test_sniff_delimiter(python_parser_only, kwargs): tm.assert_frame_equal(result, expected) +def test_sniff_delimiter_comment(python_parser_only): + data = """# comment line +index|A|B|C +# comment line +foo|1|2|3 # ignore | this +bar|4|5|6 +baz|7|8|9 +""" + parser = python_parser_only + result = parser.read_csv(StringIO(data), index_col=0, sep=None, comment="#") + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=["A", "B", "C"], + index=Index(["foo", "bar", "baz"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("encoding", [None, "utf-8"]) def test_sniff_delimiter_encoding(python_parser_only, encoding): parser = python_parser_only From 94fdc303914d05b121127b6701b3c132b797386e Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Mon, 2 Mar 2020 21:09:20 -0600 Subject: [PATCH 225/388] Don't create _join_functions (#32336) --- pandas/core/reshape/merge.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d4d9c26686891..c301d6e7c7155 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1312,7 +1312,12 @@ def _get_join_indexers( kwargs = copy.copy(kwargs) if how == "left": kwargs["sort"] = sort - join_func = _join_functions[how] + join_func = { + "inner": libjoin.inner_join, + "left": libjoin.left_outer_join, + "right": _right_outer_join, + "outer": libjoin.full_outer_join, + }[how] return join_func(lkey, rkey, count, **kwargs) @@ -1842,14 +1847,6 @@ def _right_outer_join(x, y, max_groups): return left_indexer, right_indexer -_join_functions = { - "inner": libjoin.inner_join, - "left": libjoin.left_outer_join, - "right": _right_outer_join, - "outer": libjoin.full_outer_join, -} - - def _factorize_keys(lk, rk, sort=True): # Some pre-processing for non-ndarray lk / rk if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): From ebf96681eebd11030640f7bc4d8693b3919cf802 Mon Sep 17 00:00:00 2001 From: Ayla Khan <1399377+a-y-khan@users.noreply.github.com> Date: Mon, 2 Mar 2020 20:24:22 -0700 Subject: [PATCH 226/388] API: replace() should raise an exception if invalid argument is given (#31946) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/generic.py | 15 +++++++++++++++ pandas/tests/frame/methods/test_replace.py | 11 +++++++++++ pandas/tests/series/methods/test_replace.py | 11 +++++++++++ 4 files changed, 38 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1df0b9dc5554c..bd2308854afa3 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -326,6 +326,7 @@ Reshaping - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) - :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) +- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) Sparse diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ad92ce3c4a992..f7eb79a4f1c78 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6169,7 +6169,9 @@ def bfill( AssertionError * If `regex` is not a ``bool`` and `to_replace` is not ``None``. + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` * If `to_replace` is a ``dict`` and `value` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` * If `to_replace` is ``None`` and `regex` is not compilable @@ -6178,6 +6180,7 @@ def bfill( * When replacing multiple ``bool`` or ``datetime64`` objects and the arguments to `to_replace` does not match the type of the value being replaced + ValueError * If a ``list`` or an ``ndarray`` is passed to `to_replace` and `value` but they are not the same length. @@ -6373,6 +6376,18 @@ def replace( regex=False, method="pad", ): + if not ( + is_scalar(to_replace) + or isinstance(to_replace, pd.Series) + or is_re_compilable(to_replace) + or is_list_like(to_replace) + ): + raise TypeError( + "Expecting 'to_replace' to be either a scalar, array-like, " + "dict or None, got invalid type " + f"{repr(type(to_replace).__name__)}" + ) + inplace = validate_bool_kwarg(inplace, "inplace") if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 92b74c4409d7d..ee89562261b19 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1363,3 +1363,14 @@ def test_replace_after_convert_dtypes(self): result = df.replace(1, 10) expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") tm.assert_frame_equal(result, expected) + + def test_replace_invalid_to_replace(self): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + df = pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) + msg = ( + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" + ) + with pytest.raises(TypeError, match=msg): + df.replace(lambda x: x.strip()) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 770ad38b0215e..26eaf53616282 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -362,3 +362,14 @@ def test_replace_no_cast(self, ser, exp): expected = pd.Series(exp) tm.assert_series_equal(result, expected) + + def test_replace_invalid_to_replace(self): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + series = pd.Series(["a", "b", "c "]) + msg = ( + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" + ) + with pytest.raises(TypeError, match=msg): + series.replace(lambda x: x.strip()) From 821aa25c9039e72da9a7b236cf2f9e7d549cbb7b Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Mon, 2 Mar 2020 21:28:42 -0600 Subject: [PATCH 227/388] BUG: Fix __ne__ comparison for Categorical (#32304) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/categorical.py | 5 ++++- pandas/tests/extension/test_categorical.py | 13 +++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index bd2308854afa3..57c53f73962dc 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -199,6 +199,7 @@ Categorical - Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`) - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) +- Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`) - Datetimelike diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4167c75eb5782..40a169d03f39c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -103,7 +103,10 @@ def func(self, other): mask = (self._codes == -1) | (other_codes == -1) if mask.any(): # In other series, the leads to False, so do that here too - ret[mask] = False + if opname == "__ne__": + ret[(self._codes == -1) & (other_codes == -1)] = True + else: + ret[mask] = False return ret if is_scalar(other): diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 69a97f5c9fe02..059d3453995bd 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -282,6 +282,19 @@ def _compare_other(self, s, data, op_name, other): with pytest.raises(TypeError, match=msg): op(data, other) + @pytest.mark.parametrize( + "categories", + [["a", "b"], [0, 1], [pd.Timestamp("2019"), pd.Timestamp("2020")]], + ) + def test_not_equal_with_na(self, categories): + # https://github.com/pandas-dev/pandas/issues/32276 + c1 = Categorical.from_codes([-1, 0], categories=categories) + c2 = Categorical.from_codes([0, 1], categories=categories) + + result = c1 != c2 + + assert result.all() + class TestParsing(base.BaseParsingTests): pass From f6b6c1576345ce9f6b9c7d65b7fe74a7d285a964 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 3 Mar 2020 03:36:26 +0000 Subject: [PATCH 228/388] CLN: clean-up show_versions and consistently use null for json output (#32042) --- pandas/_typing.py | 2 +- pandas/util/_print_versions.py | 94 ++++++++++++++++------------------ 2 files changed, 46 insertions(+), 50 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index e2858441605f7..3b7392f781525 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -64,7 +64,7 @@ Label = Optional[Hashable] Level = Union[Label, int] Ordered = Optional[bool] -JSONSerializable = Union[PythonScalar, List, Dict] +JSONSerializable = Optional[Union[PythonScalar, List, Dict]] Axes = Collection # For functions like rename that convert one label to another diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index f9502cc22b0c6..7fc85a04e7d84 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -5,8 +5,9 @@ import platform import struct import sys -from typing import List, Optional, Tuple, Union +from typing import Dict, Optional, Union +from pandas._typing import JSONSerializable from pandas.compat._optional import VERSIONS, _get_version, import_optional_dependency @@ -21,43 +22,32 @@ def _get_commit_hash() -> Optional[str]: return versions["full-revisionid"] -def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]: +def _get_sys_info() -> Dict[str, JSONSerializable]: """ - Returns system information as a list + Returns system information as a JSON serializable dictionary. + """ + uname_result = platform.uname() + language_code, encoding = locale.getlocale() + return { + "commit": _get_commit_hash(), + "python": ".".join(str(i) for i in sys.version_info), + "python-bits": struct.calcsize("P") * 8, + "OS": uname_result.system, + "OS-release": uname_result.release, + "Version": uname_result.version, + "machine": uname_result.machine, + "processor": uname_result.processor, + "byteorder": sys.byteorder, + "LC_ALL": os.environ.get("LC_ALL"), + "LANG": os.environ.get("LANG"), + "LOCALE": {"language-code": language_code, "encoding": encoding}, + } + + +def _get_dependency_info() -> Dict[str, JSONSerializable]: + """ + Returns dependency information as a JSON serializable dictionary. """ - blob: List[Tuple[str, Optional[Union[str, int]]]] = [] - - # get full commit hash - commit = _get_commit_hash() - - blob.append(("commit", commit)) - - try: - (sysname, nodename, release, version, machine, processor) = platform.uname() - blob.extend( - [ - ("python", ".".join(map(str, sys.version_info))), - ("python-bits", struct.calcsize("P") * 8), - ("OS", f"{sysname}"), - ("OS-release", f"{release}"), - # FIXME: dont leave commented-out - # ("Version", f"{version}"), - ("machine", f"{machine}"), - ("processor", f"{processor}"), - ("byteorder", f"{sys.byteorder}"), - ("LC_ALL", f"{os.environ.get('LC_ALL', 'None')}"), - ("LANG", f"{os.environ.get('LANG', 'None')}"), - ("LOCALE", ".".join(map(str, locale.getlocale()))), - ] - ) - except (KeyError, ValueError): - pass - - return blob - - -def show_versions(as_json=False): - sys_info = get_sys_info() deps = [ "pandas", # required @@ -86,39 +76,45 @@ def show_versions(as_json=False): "IPython", "pandas_datareader", ] - deps.extend(list(VERSIONS)) - deps_blob = [] + result: Dict[str, JSONSerializable] = {} for modname in deps: mod = import_optional_dependency( modname, raise_on_missing=False, on_version="ignore" ) - ver: Optional[str] - if mod: - ver = _get_version(mod) - else: - ver = None - deps_blob.append((modname, ver)) + result[modname] = _get_version(mod) if mod else None + return result + + +def show_versions(as_json: Union[str, bool] = False) -> None: + sys_info = _get_sys_info() + deps = _get_dependency_info() if as_json: - j = dict(system=dict(sys_info), dependencies=dict(deps_blob)) + j = dict(system=sys_info, dependencies=deps) if as_json is True: print(j) else: + assert isinstance(as_json, str) # needed for mypy with codecs.open(as_json, "wb", encoding="utf8") as f: json.dump(j, f, indent=2) else: + assert isinstance(sys_info["LOCALE"], dict) # needed for mypy + language_code = sys_info["LOCALE"]["language-code"] + encoding = sys_info["LOCALE"]["encoding"] + sys_info["LOCALE"] = f"{language_code}.{encoding}" + maxlen = max(len(x) for x in deps) print("\nINSTALLED VERSIONS") print("------------------") - for k, stat in sys_info: - print(f"{k:<{maxlen}}: {stat}") + for k, v in sys_info.items(): + print(f"{k:<{maxlen}}: {v}") print("") - for k, stat in deps_blob: - print(f"{k:<{maxlen}}: {stat}") + for k, v in deps.items(): + print(f"{k:<{maxlen}}: {v}") def main() -> int: From 4018550c796f9b23369267565b8ab8c87b9d14ad Mon Sep 17 00:00:00 2001 From: davidwales <55477181+davidwales@users.noreply.github.com> Date: Tue, 3 Mar 2020 14:49:53 +1100 Subject: [PATCH 229/388] Add missing newline (#32404) --- pandas/core/indexes/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ceb3f26a0526a..af6cbd0e95ec1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4817,6 +4817,7 @@ def isin(self, values, level=None): Int64Index([1, 2, 3], dtype='int64') Check whether each index value in a list of values. + >>> idx.isin([1, 4]) array([ True, False, False]) From 0dc93cd712ae1a1f0e862331ec31791a451a422a Mon Sep 17 00:00:00 2001 From: ShilpaSugan <51447420+ShilpaSugan@users.noreply.github.com> Date: Tue, 3 Mar 2020 12:31:30 +0000 Subject: [PATCH 230/388] TST: add message check to pytest.raises (tests/arrays/test_boolean.py) (#32397) --- pandas/tests/arrays/test_boolean.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index d14d6f3ff0c41..f4b466f4804c7 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -248,7 +248,11 @@ def test_coerce_to_numpy_array(): tm.assert_numpy_array_equal(result, expected) # with missing values will raise error arr = pd.array([True, False, None], dtype="boolean") - with pytest.raises(ValueError): + msg = ( + "cannot convert to 'bool'-dtype NumPy array with missing values. " + "Specify an appropriate 'na_value' for this dtype." + ) + with pytest.raises(ValueError, match=msg): np.array(arr, dtype="bool") From 283f81c28616cd7f11374da866a6d94d3630b150 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 05:32:40 -0800 Subject: [PATCH 231/388] TYP: internals (#32403) --- pandas/core/internals/blocks.py | 84 ++++++++++++++++--------- pandas/core/internals/managers.py | 101 ++++++++++++++++++------------ 2 files changed, 114 insertions(+), 71 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 67b734b8890f4..70fd3ecdc2098 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -11,6 +11,7 @@ import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare +from pandas._typing import DtypeObj from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -170,20 +171,20 @@ def _consolidate_key(self): return (self._can_consolidate, self.dtype.name) @property - def _is_single_block(self): + def _is_single_block(self) -> bool: return self.ndim == 1 @property - def is_view(self): + def is_view(self) -> bool: """ return a boolean if I am possibly a view """ return self.values.base is not None @property - def is_datelike(self): + def is_datelike(self) -> bool: """ return True if I am a non-datelike """ return self.is_datetime or self.is_timedelta - def is_categorical_astype(self, dtype): + def is_categorical_astype(self, dtype) -> bool: """ validate that we have a astypeable to categorical, returns a boolean if we are a categorical @@ -255,7 +256,7 @@ def mgr_locs(self, new_mgr_locs): self._mgr_locs = new_mgr_locs @property - def array_dtype(self): + def array_dtype(self) -> DtypeObj: """ the dtype to return if I want to construct this block as an array @@ -333,7 +334,7 @@ def dtype(self): return self.values.dtype @property - def ftype(self): + def ftype(self) -> str: if getattr(self.values, "_pandas_ftype", False): dtype = self.dtype.subtype else: @@ -367,7 +368,7 @@ def set(self, locs, values): """ self.values[locs] = values - def delete(self, loc): + def delete(self, loc) -> None: """ Delete given loc(-s) from block in-place. """ @@ -401,7 +402,7 @@ def _split_op_result(self, result) -> List["Block"]: return [result] - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace: bool = False, downcast=None): """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again @@ -687,7 +688,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): return values # block actions # - def copy(self, deep=True): + def copy(self, deep: bool = True): """ copy constructor """ values = self.values if deep: @@ -695,7 +696,13 @@ def copy(self, deep=True): return self.make_block_same_class(values, ndim=self.ndim) def replace( - self, to_replace, value, inplace=False, filter=None, regex=False, convert=True + self, + to_replace, + value, + inplace: bool = False, + filter=None, + regex: bool = False, + convert: bool = True, ): """ replace the to_replace value with value, possible to create new @@ -917,7 +924,15 @@ def setitem(self, indexer, value): block = self.make_block(values) return block - def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): + def putmask( + self, + mask, + new, + align: bool = True, + inplace: bool = False, + axis: int = 0, + transpose: bool = False, + ): """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1261,7 +1276,7 @@ def func(x): blocks = [self.make_block_same_class(interp_values)] return self._maybe_downcast(blocks, downcast) - def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): + def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_tuple=None): """ Take values according to indexer and return them as a block.bb @@ -1302,7 +1317,7 @@ def diff(self, n: int, axis: int = 1) -> List["Block"]: new_values = _block_shape(new_values, ndim=self.ndim) return [self.make_block(values=new_values)] - def shift(self, periods, axis=0, fill_value=None): + def shift(self, periods, axis: int = 0, fill_value=None): """ shift the block by periods, possibly upcast """ # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also @@ -1334,7 +1349,7 @@ def where( self, other, cond, - align=True, + align: bool = True, errors="raise", try_cast: bool = False, axis: int = 0, @@ -1346,11 +1361,12 @@ def where( ---------- other : a ndarray/object cond : the condition to respect - align : boolean, perform alignment on other/cond + align : bool, default True + Perform alignment on other/cond. errors : str, {'raise', 'ignore'}, default 'raise' - ``raise`` : allow exceptions to be raised - ``ignore`` : suppress exceptions. On error return original object - axis : int + axis : int, default 0 Returns ------- @@ -1482,7 +1498,7 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): blocks = [make_block(new_values, placement=new_placement)] return blocks, mask - def quantile(self, qs, interpolation="linear", axis=0): + def quantile(self, qs, interpolation="linear", axis: int = 0): """ compute the quantiles of the @@ -1539,7 +1555,13 @@ def quantile(self, qs, interpolation="linear", axis=0): return make_block(result, placement=np.arange(len(result)), ndim=ndim) def _replace_coerce( - self, to_replace, value, inplace=True, regex=False, convert=False, mask=None + self, + to_replace, + value, + inplace: bool = True, + regex: bool = False, + convert: bool = False, + mask=None, ): """ Replace value corresponding to the given boolean array with another @@ -1551,7 +1573,7 @@ def _replace_coerce( Scalar to replace or regular expression to match. value : object Replacement object. - inplace : bool, default False + inplace : bool, default True Perform inplace modification. regex : bool, default False If true, perform regular expression substitution. @@ -1638,7 +1660,9 @@ def set(self, locs, values, check=False): assert locs.tolist() == [0] self.values = values - def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): + def putmask( + self, mask, new, align=True, inplace=False, axis=0, transpose=False, + ): """ putmask the data to the block; we must be a single block and not generate other blocks @@ -1754,7 +1778,7 @@ def _can_hold_na(self): return self._holder._can_hold_na @property - def is_view(self): + def is_view(self) -> bool: """Extension arrays are never treated as views.""" return False @@ -1819,7 +1843,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): # we are expected to return a 2-d ndarray return values.reshape(1, len(values)) - def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): + def take_nd(self, indexer, axis: int = 0, new_mgr_locs=None, fill_tuple=None): """ Take values according to indexer and return them as a block. """ @@ -2080,7 +2104,7 @@ def to_native_types( ) return formatter.get_result_as_array() - def should_store(self, value): + def should_store(self, value) -> bool: # when inserting a column should not coerce integers to floats # unnecessarily return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype @@ -2098,7 +2122,7 @@ def _can_hold_element(self, element: Any) -> bool: element, (float, int, complex, np.float_, np.int_) ) and not isinstance(element, (bool, np.bool_)) - def should_store(self, value): + def should_store(self, value) -> bool: return issubclass(value.dtype.type, np.complexfloating) @@ -2117,7 +2141,7 @@ def _can_hold_element(self, element: Any) -> bool: ) return is_integer(element) - def should_store(self, value): + def should_store(self, value) -> bool: return is_integer_dtype(value) and value.dtype == self.dtype @@ -2255,7 +2279,7 @@ def to_native_types( ).reshape(i8values.shape) return np.atleast_2d(result) - def should_store(self, value): + def should_store(self, value) -> bool: return ( issubclass(value.dtype.type, np.datetime64) and not is_datetime64tz_dtype(value) @@ -2320,7 +2344,7 @@ def _maybe_coerce_values(self, values): return values @property - def is_view(self): + def is_view(self) -> bool: """ return a boolean if I am possibly a view """ # check the ndarray values of the DatetimeIndex values return self.values._data.base is not None @@ -2507,7 +2531,7 @@ def fillna(self, value, **kwargs): ) return super().fillna(value, **kwargs) - def should_store(self, value): + def should_store(self, value) -> bool: return issubclass( value.dtype.type, np.timedelta64 ) and not is_extension_array_dtype(value) @@ -2553,7 +2577,7 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, np.bool_) return isinstance(element, (bool, np.bool_)) - def should_store(self, value): + def should_store(self, value) -> bool: return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype( value ) @@ -2645,7 +2669,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def _can_hold_element(self, element: Any) -> bool: return True - def should_store(self, value): + def should_store(self, value) -> bool: return not ( issubclass( value.dtype.type, diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9c90b20fc0f16..64896e2cac311 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -145,19 +145,20 @@ def __init__( self._rebuild_blknos_and_blklocs() - def make_empty(self, axes=None): + def make_empty(self, axes=None) -> "BlockManager": """ return an empty BlockManager with the items axis of len 0 """ if axes is None: - axes = [ensure_index([])] + [ensure_index(a) for a in self.axes[1:]] + axes = [Index([])] + self.axes[1:] # preserve dtype if possible if self.ndim == 1: + assert isinstance(self, SingleBlockManager) # for mypy blocks = np.array([], dtype=self.array_dtype) else: blocks = [] return type(self)(blocks, axes) - def __nonzero__(self): + def __nonzero__(self) -> bool: return True # Python3 compat @@ -171,7 +172,7 @@ def shape(self) -> Tuple[int, ...]: def ndim(self) -> int: return len(self.axes) - def set_axis(self, axis: int, new_labels: Index): + def set_axis(self, axis: int, new_labels: Index) -> None: # Caller is responsible for ensuring we have an Index object. old_len = len(self.axes[axis]) new_len = len(new_labels) @@ -214,7 +215,7 @@ def _is_single_block(self) -> bool: 0, len(self), 1 ) - def _rebuild_blknos_and_blklocs(self): + def _rebuild_blknos_and_blklocs(self) -> None: """ Update mgr._blknos / mgr._blklocs. """ @@ -288,7 +289,7 @@ def unpickle_block(values, mgr_locs): self._post_setstate() - def _post_setstate(self): + def _post_setstate(self) -> None: self._is_consolidated = False self._known_consolidated = False self._rebuild_blknos_and_blklocs() @@ -308,7 +309,7 @@ def __repr__(self) -> str: output += f"\n{pprint_thing(block)}" return output - def _verify_integrity(self): + def _verify_integrity(self) -> None: mgr_shape = self.shape tot_items = sum(len(x.mgr_locs) for x in self.blocks) for block in self.blocks: @@ -347,7 +348,7 @@ def reduce(self, func, *args, **kwargs): return res - def apply(self, f, filter=None, **kwargs): + def apply(self, f, filter=None, **kwargs) -> "BlockManager": """ Iterate over the blocks, collect and create a new BlockManager. @@ -430,13 +431,13 @@ def apply(self, f, filter=None, **kwargs): def quantile( self, - axis=0, - consolidate=True, - transposed=False, + axis: int = 0, + consolidate: bool = True, + transposed: bool = False, interpolation="linear", qs=None, numeric_only=None, - ): + ) -> "BlockManager": """ Iterate over blocks applying quantile reduction. This routine is intended for reduction type operations and @@ -455,7 +456,7 @@ def quantile( Returns ------- - Block Manager (new object) + BlockManager """ # Series dispatches to DataFrame for quantile, which allows us to # simplify some of the code here and in the blocks @@ -533,44 +534,48 @@ def get_axe(block, qs, axes): fastpath=True, ) - def isna(self, func): + def isna(self, func) -> "BlockManager": return self.apply("apply", func=func) - def where(self, **kwargs): + def where(self, **kwargs) -> "BlockManager": return self.apply("where", **kwargs) - def setitem(self, **kwargs): + def setitem(self, **kwargs) -> "BlockManager": return self.apply("setitem", **kwargs) def putmask(self, **kwargs): return self.apply("putmask", **kwargs) - def diff(self, **kwargs): + def diff(self, **kwargs) -> "BlockManager": return self.apply("diff", **kwargs) - def interpolate(self, **kwargs): + def interpolate(self, **kwargs) -> "BlockManager": return self.apply("interpolate", **kwargs) - def shift(self, **kwargs): + def shift(self, **kwargs) -> "BlockManager": return self.apply("shift", **kwargs) - def fillna(self, **kwargs): + def fillna(self, **kwargs) -> "BlockManager": return self.apply("fillna", **kwargs) - def downcast(self, **kwargs): + def downcast(self, **kwargs) -> "BlockManager": return self.apply("downcast", **kwargs) - def astype(self, dtype, copy: bool = False, errors: str = "raise"): + def astype( + self, dtype, copy: bool = False, errors: str = "raise" + ) -> "BlockManager": return self.apply("astype", dtype=dtype, copy=copy, errors=errors) - def convert(self, **kwargs): + def convert(self, **kwargs) -> "BlockManager": return self.apply("convert", **kwargs) - def replace(self, value, **kwargs): + def replace(self, value, **kwargs) -> "BlockManager": assert np.ndim(value) == 0, value return self.apply("replace", value=value, **kwargs) - def replace_list(self, src_list, dest_list, inplace=False, regex=False): + def replace_list( + self, src_list, dest_list, inplace: bool = False, regex: bool = False + ) -> "BlockManager": """ do a list replace """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -602,7 +607,7 @@ def comp(s, regex=False): rb = [blk if inplace else blk.copy()] for i, (s, d) in enumerate(zip(src_list, dest_list)): # TODO: assert/validate that `d` is always a scalar? - new_rb = [] + new_rb: List[Block] = [] for b in rb: m = masks[i][b.mgr_locs.indexer] convert = i == src_len @@ -633,7 +638,7 @@ def is_consolidated(self) -> bool: self._consolidate_check() return self._is_consolidated - def _consolidate_check(self): + def _consolidate_check(self) -> None: ftypes = [blk.ftype for blk in self.blocks] self._is_consolidated = len(ftypes) == len(set(ftypes)) self._known_consolidated = True @@ -670,7 +675,7 @@ def is_view(self) -> bool: return False - def get_bool_data(self, copy: bool = False): + def get_bool_data(self, copy: bool = False) -> "BlockManager": """ Parameters ---------- @@ -680,7 +685,7 @@ def get_bool_data(self, copy: bool = False): self._consolidate_inplace() return self.combine([b for b in self.blocks if b.is_bool], copy) - def get_numeric_data(self, copy: bool = False): + def get_numeric_data(self, copy: bool = False) -> "BlockManager": """ Parameters ---------- @@ -712,7 +717,7 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": return type(self)(new_blocks, axes, do_integrity_check=False) - def get_slice(self, slobj: slice, axis: int = 0): + def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": if axis == 0: new_blocks = self._slice_take_blocks_ax0(slobj) @@ -737,7 +742,7 @@ def __contains__(self, item) -> bool: def nblocks(self) -> int: return len(self.blocks) - def copy(self, deep=True): + def copy(self, deep=True) -> "BlockManager": """ Make deep or shallow copy of BlockManager @@ -797,7 +802,7 @@ def as_array(self, transpose: bool = False) -> np.ndarray: return arr.transpose() if transpose else arr - def _interleave(self): + def _interleave(self) -> np.ndarray: """ Return ndarray from blocks with specified item order Items must be contained in the blocks @@ -807,7 +812,7 @@ def _interleave(self): # TODO: https://github.com/pandas-dev/pandas/issues/22791 # Give EAs some input on what happens here. Sparse needs this. if is_sparse(dtype): - dtype = dtype.subtype + dtype = dtype.subtype # type: ignore elif is_extension_array_dtype(dtype): dtype = "object" @@ -906,7 +911,7 @@ def consolidate(self) -> "BlockManager": bm._consolidate_inplace() return bm - def _consolidate_inplace(self): + def _consolidate_inplace(self) -> None: if not self.is_consolidated(): self.blocks = tuple(_consolidate(self.blocks)) self._is_consolidated = True @@ -1168,7 +1173,13 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): self._consolidate_inplace() def reindex_axis( - self, new_index, axis, method=None, limit=None, fill_value=None, copy=True + self, + new_index, + axis: int, + method=None, + limit=None, + fill_value=None, + copy: bool = True, ): """ Conform block manager to new index. @@ -1183,7 +1194,13 @@ def reindex_axis( ) def reindex_indexer( - self, new_axis, indexer, axis, fill_value=None, allow_dups=False, copy=True + self, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups=False, + copy: bool = True, ): """ Parameters @@ -1191,8 +1208,10 @@ def reindex_indexer( new_axis : Index indexer : ndarray of int64 or None axis : int - fill_value : object - allow_dups : bool + fill_value : object, default None + allow_dups : bool, default False + copy : bool, default True + pandas-indexer with -1's only. """ @@ -1329,7 +1348,7 @@ def _make_na_block(self, placement, fill_value=None): block_values.fill(fill_value) return make_block(block_values, placement=placement) - def take(self, indexer, axis=1, verify=True, convert=True): + def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True): """ Take items along any axis. """ @@ -1506,11 +1525,11 @@ def index(self) -> Index: return self.axes[0] @property - def dtype(self): + def dtype(self) -> DtypeObj: return self._block.dtype @property - def array_dtype(self): + def array_dtype(self) -> DtypeObj: return self._block.array_dtype def get_dtype_counts(self): From 706e6427bd1afd05c82eae810d288bbe22ed21c8 Mon Sep 17 00:00:00 2001 From: Adrian Mastronardi <7809331+AdrianMastronardi@users.noreply.github.com> Date: Tue, 3 Mar 2020 11:22:33 -0300 Subject: [PATCH 232/388] DOC: Fix SA04 errors in docstrings xref #28792 (#32180) --- pandas/core/algorithms.py | 6 +++--- pandas/core/frame.py | 17 +++++++++-------- pandas/core/series.py | 11 +++++++---- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7201629cb086e..f9059054ba59f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -313,8 +313,8 @@ def unique(values): See Also -------- - Index.unique - Series.unique + Index.unique : Return unique values from an Index. + Series.unique : Return unique values of Series object. Examples -------- @@ -1515,7 +1515,7 @@ def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None) See Also -------- - numpy.take + numpy.take : Take elements from an array along an axis. Examples -------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bfb83b59497e6..106128004f549 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -358,9 +358,9 @@ class DataFrame(NDFrame): -------- DataFrame.from_records : Constructor from tuples, also record arrays. DataFrame.from_dict : From dicts of Series, arrays, or dicts. - read_csv - read_table - read_clipboard + read_csv : Read a comma-separated values (csv) file into DataFrame. + read_table : Read general delimited file into DataFrame. + read_clipboard : Read text from clipboard into DataFrame. Examples -------- @@ -7393,8 +7393,9 @@ def corr(self, method="pearson", min_periods=1) -> "DataFrame": See Also -------- - DataFrame.corrwith - Series.corr + DataFrame.corrwith : Compute pairwise correlation with another + DataFrame or Series. + Series.corr : Compute the correlation between two Series. Examples -------- @@ -7596,7 +7597,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series: See Also -------- - DataFrame.corr + DataFrame.corr : Compute pairwise correlation of columns. """ axis = self._get_axis_number(axis) this = self._get_numeric_data() @@ -8001,7 +8002,7 @@ def idxmin(self, axis=0, skipna=True) -> Series: See Also -------- - Series.idxmin + Series.idxmin : Return index of the minimum element. Notes ----- @@ -8039,7 +8040,7 @@ def idxmax(self, axis=0, skipna=True) -> Series: See Also -------- - Series.idxmax + Series.idxmax : Return index of the maximum element. Notes ----- diff --git a/pandas/core/series.py b/pandas/core/series.py index d565cbbdd5344..19b247466e14d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -555,7 +555,7 @@ def ravel(self, order="C"): See Also -------- - numpy.ndarray.ravel + numpy.ndarray.ravel : Return a flattened array. """ return self._values.ravel(order=order) @@ -2079,6 +2079,9 @@ def round(self, decimals=0, *args, **kwargs) -> "Series": decimals : int, default 0 Number of decimal places to round to. If decimals is negative, it specifies the number of positions to the left of the decimal point. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. Returns ------- @@ -2133,8 +2136,8 @@ def quantile(self, q=0.5, interpolation="linear"): See Also -------- - core.window.Rolling.quantile - numpy.percentile + core.window.Rolling.quantile : Calculate the rolling quantile. + numpy.percentile : Returns the q-th percentile(s) of the array elements. Examples -------- @@ -3152,7 +3155,7 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> "Series": See Also -------- - numpy.ndarray.argsort + numpy.ndarray.argsort : Returns the indices that would sort this array. """ values = self._values mask = isna(values) From d2064793087ccce7265829d9bb54ec6e0bbb0446 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 07:27:17 -0800 Subject: [PATCH 233/388] REF: remove _convert_scalar_indexer (#31962) --- pandas/core/indexes/base.py | 42 ----------------------------- pandas/core/indexes/category.py | 10 ------- pandas/core/indexes/datetimelike.py | 27 ------------------- pandas/core/indexes/interval.py | 6 ----- pandas/core/indexes/numeric.py | 14 ---------- pandas/core/indexing.py | 34 ++++++++--------------- pandas/core/series.py | 6 +---- 7 files changed, 12 insertions(+), 127 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index af6cbd0e95ec1..0b9a29ec0aae4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3078,48 +3078,6 @@ def _get_partial_string_timestamp_match_key(self, key): # GH#10331 return key - def _convert_scalar_indexer(self, key, kind: str_t): - """ - Convert a scalar indexer. - - Parameters - ---------- - key : label of the slice bound - kind : {'loc', 'getitem'} - """ - assert kind in ["loc", "getitem"] - - if len(self) and not isinstance(self, ABCMultiIndex): - - # we can raise here if we are definitive that this - # is positional indexing (eg. .loc on with a float) - # or label indexing if we are using a type able - # to be represented in the index - - if kind == "getitem" and is_float(key): - if not self.is_floating(): - raise KeyError(key) - - elif kind == "loc" and is_float(key): - - # we want to raise KeyError on string/mixed here - # technically we *could* raise a TypeError - # on anything but mixed though - if self.inferred_type not in [ - "floating", - "mixed-integer-float", - "integer-na", - "string", - "mixed", - ]: - raise KeyError(key) - - elif kind == "loc" and is_integer(key): - if not (is_integer_dtype(self.dtype) or is_object_dtype(self.dtype)): - raise KeyError(key) - - return key - def _validate_positional_slice(self, key: slice): """ For positional indexing, a slice must have either int or None diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 8c2d7f4aa6c0e..d43ae8eb54818 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -574,16 +574,6 @@ def get_indexer_non_unique(self, target): indexer, missing = self._engine.get_indexer_non_unique(codes) return ensure_platform_int(indexer), missing - @Appender(Index._convert_scalar_indexer.__doc__) - def _convert_scalar_indexer(self, key, kind: str): - assert kind in ["loc", "getitem"] - if kind == "loc": - try: - return self.categories._convert_scalar_indexer(key, kind="loc") - except TypeError: - raise KeyError(key) - return super()._convert_scalar_indexer(key, kind=kind) - @Appender(Index._convert_list_indexer.__doc__) def _convert_list_indexer(self, keyarr): # Return our indexer or raise if all of the values are not included in diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d1e21a2fe7657..894e1d95a17bc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -18,7 +18,6 @@ is_bool_dtype, is_categorical_dtype, is_dtype_equal, - is_float, is_integer, is_list_like, is_period_dtype, @@ -377,32 +376,6 @@ def _format_attrs(self): # -------------------------------------------------------------------- # Indexing Methods - def _convert_scalar_indexer(self, key, kind: str): - """ - We don't allow integer or float indexing on datetime-like when using - loc. - - Parameters - ---------- - key : label of the slice bound - kind : {'loc', 'getitem'} - """ - assert kind in ["loc", "getitem"] - - if not is_scalar(key): - raise TypeError(key) - - # we don't allow integer/float indexing for loc - # we don't allow float indexing for getitem - is_int = is_integer(key) - is_flt = is_float(key) - if kind == "loc" and (is_int or is_flt): - raise KeyError(key) - elif kind == "getitem" and is_flt: - raise KeyError(key) - - return super()._convert_scalar_indexer(key, kind=kind) - def _validate_partial_date_slice(self, reso: str): raise NotImplementedError diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index d396d1c76f357..6968837fb13e6 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -514,12 +514,6 @@ def _should_fallback_to_positional(self): # positional in this case return self.dtype.subtype.kind in ["m", "M"] - @Appender(Index._convert_scalar_indexer.__doc__) - def _convert_scalar_indexer(self, key, kind: str): - assert kind in ["getitem", "loc"] - # never iloc, so no-op - return key - def _maybe_cast_slice_bound(self, label, side, kind): return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 06a26cc90555e..cb6f68ae0376d 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -254,14 +254,6 @@ def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak return self.values.view(self._default_dtype) - @Appender(Index._convert_scalar_indexer.__doc__) - def _convert_scalar_indexer(self, key, kind: str): - assert kind in ["loc", "getitem"] - - # never iloc, which we don't coerce to integers - key = self._maybe_cast_indexer(key) - return super()._convert_scalar_indexer(key, kind=kind) - class Int64Index(IntegerIndex): __doc__ = _num_index_shared_docs["class_descr"] % _int64_descr_args @@ -391,12 +383,6 @@ def astype(self, dtype, copy=True): def _should_fallback_to_positional(self): return False - @Appender(Index._convert_scalar_indexer.__doc__) - def _convert_scalar_indexer(self, key, kind: str): - assert kind in ["loc", "getitem"] - # no-op for non-iloc - return key - @Appender(Index._convert_slice_indexer.__doc__) def _convert_slice_indexer(self, key: slice, kind: str): assert kind in ["loc", "getitem"] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 35e61ab6a59c9..9a671c7fc170a 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -866,16 +866,7 @@ def _validate_key(self, key, axis: int): # slice of labels (where start-end in labels) # slice of integers (only if in the labels) # boolean - - if isinstance(key, slice): - return - - if com.is_bool_indexer(key): - return - - if not is_list_like_indexer(key): - labels = self.obj._get_axis(axis) - labels._convert_scalar_indexer(key, kind="loc") + pass def _has_valid_setitem_indexer(self, indexer) -> bool: return True @@ -1139,15 +1130,6 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): if isinstance(key, slice): return labels._convert_slice_indexer(key, kind="loc") - if is_scalar(key): - # try to find out correct indexer, if not type correct raise - try: - key = labels._convert_scalar_indexer(key, kind="loc") - except KeyError: - # but we will allow setting - if not is_setter: - raise - # see if we are positional in nature is_int_index = labels.is_integer() is_int_positional = is_integer(key) and not is_int_index @@ -2029,11 +2011,17 @@ def _convert_key(self, key, is_setter: bool = False): if is_setter: return list(key) - lkey = list(key) - for n, (ax, i) in enumerate(zip(self.obj.axes, key)): - lkey[n] = ax._convert_scalar_indexer(i, kind="loc") + return key - return tuple(lkey) + def __getitem__(self, key): + if self.ndim != 1 or not is_scalar(key): + # FIXME: is_scalar check is a kludge + return super().__getitem__(key) + + # Like Index.get_value, but we do not allow positional fallback + obj = self.obj + loc = obj.index.get_loc(key) + return obj.index._get_values_for_loc(obj, loc, key) @Appender(IndexingMixin.iat.__doc__) diff --git a/pandas/core/series.py b/pandas/core/series.py index 19b247466e14d..db63e9205d48d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -852,9 +852,7 @@ def __getitem__(self, key): return self key_is_scalar = is_scalar(key) - if key_is_scalar: - key = self.index._convert_scalar_indexer(key, kind="getitem") - elif isinstance(key, (list, tuple)): + if isinstance(key, (list, tuple)): key = unpack_1tuple(key) if key_is_scalar or isinstance(self.index, MultiIndex): @@ -974,8 +972,6 @@ def _get_value(self, label, takeable: bool = False): # Similar to Index.get_value, but we do not fall back to positional loc = self.index.get_loc(label) - # We assume that _convert_scalar_indexer has already been called, - # with kind="loc", if necessary, by the time we get here return self.index._get_values_for_loc(self, loc, label) def __setitem__(self, key, value): From ce7670ccb8ecaa5100f17e0506caf8b416dd9ffe Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 07:29:47 -0800 Subject: [PATCH 234/388] Implement BlockManager.iset (#32350) --- pandas/core/internals/managers.py | 34 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 64896e2cac311..c0e32066a8a70 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -8,7 +8,7 @@ import numpy as np from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib -from pandas._typing import DtypeObj +from pandas._typing import DtypeObj, Label from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -1001,7 +1001,25 @@ def delete(self, item): ) self._rebuild_blknos_and_blklocs() - def set(self, item, value): + def set(self, item: Label, value): + """ + Set new item in-place. + + Notes + ----- + Does not consolidate. + Adds new Block if not contained in the current items Index. + """ + try: + loc = self.items.get_loc(item) + except KeyError: + # This item wasn't present, just insert at end + self.insert(len(self.items), item, value) + return + + self.iset(loc, value) + + def iset(self, loc: Union[int, slice, np.ndarray], value): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items @@ -1034,13 +1052,6 @@ def value_getitem(placement): "Shape of new values must be compatible with manager shape" ) - try: - loc = self.items.get_loc(item) - except KeyError: - # This item wasn't present, just insert at end - self.insert(len(self.items), item, value) - return - if isinstance(loc, int): loc = [loc] @@ -1086,7 +1097,7 @@ def value_getitem(placement): unfit_mgr_locs = np.concatenate(unfit_mgr_locs) unfit_count = len(unfit_mgr_locs) - new_blocks = [] + new_blocks: List[Block] = [] if value_is_extension_type: # This code (ab-)uses the fact that sparse blocks contain only # one item. @@ -1145,6 +1156,9 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): # insert to the axis; this could possibly raise a TypeError new_axis = self.items.insert(loc, item) + if value.ndim == self.ndim - 1 and not is_extension_array_dtype(value): + value = _safe_reshape(value, (1,) + value.shape) + block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1)) for blkno, count in _fast_count_smallints(self._blknos[loc:]): From d33b0025db0b2d79abe51343054d4c8bbed104c6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 08:11:54 -0800 Subject: [PATCH 235/388] CLN: remove unreachable branch (#32405) --- pandas/core/indexes/base.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0b9a29ec0aae4..173a83103b8ad 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -32,7 +32,6 @@ is_categorical, is_categorical_dtype, is_datetime64_any_dtype, - is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_float, @@ -2525,14 +2524,8 @@ def _union(self, other, sort): return other._get_reconciled_name_object(self) # TODO(EA): setops-refactor, clean all this up - if is_datetime64tz_dtype(self): - lvals = self._ndarray_values - else: - lvals = self._values - if is_datetime64tz_dtype(other): - rvals = other._ndarray_values - else: - rvals = other._values + lvals = self._values + rvals = other._values if sort is None and self.is_monotonic and other.is_monotonic: try: From dcd86e534cc6eb68fbd07f3bd3cd633dc1339023 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 17:02:47 -0800 Subject: [PATCH 236/388] REF: avoid using internals methods for to_timestamp, to_period (#32347) --- pandas/core/frame.py | 48 ++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 106128004f549..5a4b4f74b82a5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -94,10 +94,8 @@ ) from pandas.core.dtypes.generic import ( ABCDataFrame, - ABCDatetimeIndex, ABCIndexClass, ABCMultiIndex, - ABCPeriodIndex, ABCSeries, ) from pandas.core.dtypes.missing import isna, notna @@ -8246,7 +8244,9 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"): return result - def to_timestamp(self, freq=None, how="start", axis=0, copy=True) -> "DataFrame": + def to_timestamp( + self, freq=None, how: str = "start", axis: Axis = 0, copy: bool = True + ) -> "DataFrame": """ Cast to DatetimeIndex of timestamps, at *beginning* of period. @@ -8266,23 +8266,16 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True) -> "DataFrame" ------- DataFrame with DatetimeIndex """ - new_data = self._data - if copy: - new_data = new_data.copy() + new_obj = self.copy(deep=copy) - axis = self._get_axis_number(axis) - if axis == 0: - assert isinstance(self.index, (ABCDatetimeIndex, ABCPeriodIndex)) - new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how)) - elif axis == 1: - assert isinstance(self.columns, (ABCDatetimeIndex, ABCPeriodIndex)) - new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how)) - else: # pragma: no cover - raise AssertionError(f"Axis must be 0 or 1. Got {axis}") + axis_name = self._get_axis_name(axis) + old_ax = getattr(self, axis_name) + new_ax = old_ax.to_timestamp(freq=freq, how=how) - return self._constructor(new_data) + setattr(new_obj, axis_name, new_ax) + return new_obj - def to_period(self, freq=None, axis=0, copy=True) -> "DataFrame": + def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> "DataFrame": """ Convert DataFrame from DatetimeIndex to PeriodIndex. @@ -8300,23 +8293,16 @@ def to_period(self, freq=None, axis=0, copy=True) -> "DataFrame": Returns ------- - TimeSeries with PeriodIndex + DataFrame with PeriodIndex """ - new_data = self._data - if copy: - new_data = new_data.copy() + new_obj = self.copy(deep=copy) - axis = self._get_axis_number(axis) - if axis == 0: - assert isinstance(self.index, ABCDatetimeIndex) - new_data.set_axis(1, self.index.to_period(freq=freq)) - elif axis == 1: - assert isinstance(self.columns, ABCDatetimeIndex) - new_data.set_axis(0, self.columns.to_period(freq=freq)) - else: # pragma: no cover - raise AssertionError(f"Axis must be 0 or 1. Got {axis}") + axis_name = self._get_axis_name(axis) + old_ax = getattr(self, axis_name) + new_ax = old_ax.to_period(freq=freq) - return self._constructor(new_data) + setattr(new_obj, axis_name, new_ax) + return new_obj def isin(self, values) -> "DataFrame": """ From c5f0ebf8d92322445a4413c1bf9bd08e226583f0 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 4 Mar 2020 08:19:31 +0000 Subject: [PATCH 237/388] TYP/cln: generic._make_*_function (#32363) --- pandas/core/generic.py | 311 ++++++++++++++++++++++------------------- 1 file changed, 169 insertions(+), 142 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f7eb79a4f1c78..e36eace1e42e6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9899,37 +9899,37 @@ def _add_numeric_operations(cls): """ Add the operations to the cls; evaluate the doc strings again """ - axis_descr, name, name2 = _doc_parms(cls) + axis_descr, name1, name2 = _doc_parms(cls) cls.any = _make_logical_function( cls, "any", - name, - name2, - axis_descr, - _any_desc, - nanops.nanany, - _any_see_also, - _any_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc=_any_desc, + func=nanops.nanany, + see_also=_any_see_also, + examples=_any_examples, empty_value=False, ) cls.all = _make_logical_function( cls, "all", - name, - name2, - axis_descr, - _all_desc, - nanops.nanall, - _all_see_also, - _all_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc=_all_desc, + func=nanops.nanall, + see_also=_all_see_also, + examples=_all_examples, empty_value=True, ) @Substitution( desc="Return the mean absolute deviation of the values " "for the requested axis.", - name1=name, + name1=name1, name2=name2, axis_descr=axis_descr, min_count="", @@ -9957,177 +9957,177 @@ def mad(self, axis=None, skipna=None, level=None): cls.sem = _make_stat_function_ddof( cls, "sem", - name, - name2, - axis_descr, - "Return unbiased standard error of the mean over requested " + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return unbiased standard error of the mean over requested " "axis.\n\nNormalized by N-1 by default. This can be changed " "using the ddof argument", - nanops.nansem, + func=nanops.nansem, ) cls.var = _make_stat_function_ddof( cls, "var", - name, - name2, - axis_descr, - "Return unbiased variance over requested axis.\n\nNormalized by " + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return unbiased variance over requested axis.\n\nNormalized by " "N-1 by default. This can be changed using the ddof argument", - nanops.nanvar, + func=nanops.nanvar, ) cls.std = _make_stat_function_ddof( cls, "std", - name, - name2, - axis_descr, - "Return sample standard deviation over requested axis." + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return sample standard deviation over requested axis." "\n\nNormalized by N-1 by default. This can be changed using the " "ddof argument", - nanops.nanstd, + func=nanops.nanstd, ) cls.cummin = _make_cum_function( cls, "cummin", - name, - name2, - axis_descr, - "minimum", - np.minimum.accumulate, - "min", - np.inf, - np.nan, - _cummin_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="minimum", + accum_func=np.minimum.accumulate, + accum_func_name="min", + mask_a=np.inf, + mask_b=np.nan, + examples=_cummin_examples, ) cls.cumsum = _make_cum_function( cls, "cumsum", - name, - name2, - axis_descr, - "sum", - np.cumsum, - "sum", - 0.0, - np.nan, - _cumsum_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="sum", + accum_func=np.cumsum, + accum_func_name="sum", + mask_a=0.0, + mask_b=np.nan, + examples=_cumsum_examples, ) cls.cumprod = _make_cum_function( cls, "cumprod", - name, - name2, - axis_descr, - "product", - np.cumprod, - "prod", - 1.0, - np.nan, - _cumprod_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="product", + accum_func=np.cumprod, + accum_func_name="prod", + mask_a=1.0, + mask_b=np.nan, + examples=_cumprod_examples, ) cls.cummax = _make_cum_function( cls, "cummax", - name, - name2, - axis_descr, - "maximum", - np.maximum.accumulate, - "max", - -np.inf, - np.nan, - _cummax_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="maximum", + accum_func=np.maximum.accumulate, + accum_func_name="max", + mask_a=-np.inf, + mask_b=np.nan, + examples=_cummax_examples, ) cls.sum = _make_min_count_stat_function( cls, "sum", - name, - name2, - axis_descr, - """Return the sum of the values for the requested axis.\n - This is equivalent to the method ``numpy.sum``.""", - nanops.nansum, - _stat_func_see_also, - _sum_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return the sum of the values for the requested axis.\n\n" + "This is equivalent to the method ``numpy.sum``.", + func=nanops.nansum, + see_also=_stat_func_see_also, + examples=_sum_examples, ) cls.mean = _make_stat_function( cls, "mean", - name, - name2, - axis_descr, - "Return the mean of the values for the requested axis.", - nanops.nanmean, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return the mean of the values for the requested axis.", + func=nanops.nanmean, ) cls.skew = _make_stat_function( cls, "skew", - name, - name2, - axis_descr, - "Return unbiased skew over requested axis.\n\nNormalized by N-1.", - nanops.nanskew, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", + func=nanops.nanskew, ) cls.kurt = _make_stat_function( cls, "kurt", - name, - name2, - axis_descr, - "Return unbiased kurtosis over requested axis.\n\n" + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return unbiased kurtosis over requested axis.\n\n" "Kurtosis obtained using Fisher's definition of\n" "kurtosis (kurtosis of normal == 0.0). Normalized " "by N-1.", - nanops.nankurt, + func=nanops.nankurt, ) cls.kurtosis = cls.kurt cls.prod = _make_min_count_stat_function( cls, "prod", - name, - name2, - axis_descr, - "Return the product of the values for the requested axis.", - nanops.nanprod, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return the product of the values for the requested axis.", + func=nanops.nanprod, examples=_prod_examples, ) cls.product = cls.prod cls.median = _make_stat_function( cls, "median", - name, - name2, - axis_descr, - "Return the median of the values for the requested axis.", - nanops.nanmedian, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return the median of the values for the requested axis.", + func=nanops.nanmedian, ) cls.max = _make_stat_function( cls, "max", - name, - name2, - axis_descr, - """Return the maximum of the values for the requested axis.\n - If you want the *index* of the maximum, use ``idxmax``. This is - the equivalent of the ``numpy.ndarray`` method ``argmax``.""", - nanops.nanmax, - _stat_func_see_also, - _max_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return the maximum of the values for the requested axis.\n\n" + "If you want the *index* of the maximum, use ``idxmax``. This is" + "the equivalent of the ``numpy.ndarray`` method ``argmax``.", + func=nanops.nanmax, + see_also=_stat_func_see_also, + examples=_max_examples, ) cls.min = _make_stat_function( cls, "min", - name, - name2, - axis_descr, - """Return the minimum of the values for the requested axis.\n - If you want the *index* of the minimum, use ``idxmin``. This is - the equivalent of the ``numpy.ndarray`` method ``argmin``.""", - nanops.nanmin, - _stat_func_see_also, - _min_examples, + name1=name1, + name2=name2, + axis_descr=axis_descr, + desc="Return the minimum of the values for the requested axis.\n\n" + "If you want the *index* of the minimum, use ``idxmin``. This is" + "the equivalent of the ``numpy.ndarray`` method ``argmin``.", + func=nanops.nanmin, + see_also=_stat_func_see_also, + examples=_min_examples, ) @classmethod @@ -10947,8 +10947,16 @@ def _doc_parms(cls): def _make_min_count_stat_function( - cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" -): + cls, + name: str, + name1: str, + name2: str, + axis_descr: str, + desc: str, + func: Callable, + see_also: str = "", + examples: str = "", +) -> Callable: @Substitution( desc=desc, name1=name1, @@ -10983,8 +10991,8 @@ def stat_func( name, axis=axis, level=level, skipna=skipna, min_count=min_count ) return self._reduce( - f, - name, + func, + name=name, axis=axis, skipna=skipna, numeric_only=numeric_only, @@ -10995,8 +11003,16 @@ def stat_func( def _make_stat_function( - cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" -): + cls, + name: str, + name1: str, + name2: str, + axis_descr: str, + desc: str, + func: Callable, + see_also: str = "", + examples: str = "", +) -> Callable: @Substitution( desc=desc, name1=name1, @@ -11021,13 +11037,15 @@ def stat_func( if level is not None: return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) return self._reduce( - f, name, axis=axis, skipna=skipna, numeric_only=numeric_only + func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only ) return set_function_name(stat_func, name, cls) -def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): +def _make_stat_function_ddof( + cls, name: str, name1: str, name2: str, axis_descr: str, desc: str, func: Callable +) -> Callable: @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @Appender(_num_ddof_doc) def stat_func( @@ -11043,7 +11061,7 @@ def stat_func( name, axis=axis, level=level, skipna=skipna, ddof=ddof ) return self._reduce( - f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof + func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof ) return set_function_name(stat_func, name, cls) @@ -11051,17 +11069,17 @@ def stat_func( def _make_cum_function( cls, - name, - name1, - name2, - axis_descr, - desc, - accum_func, - accum_func_name, - mask_a, - mask_b, - examples, -): + name: str, + name1: str, + name2: str, + axis_descr: str, + desc: str, + accum_func: Callable, + accum_func_name: str, + mask_a: float, + mask_b: float, + examples: str, +) -> Callable: @Substitution( desc=desc, name1=name1, @@ -11145,8 +11163,17 @@ def na_accum_func(blk_values): def _make_logical_function( - cls, name, name1, name2, axis_descr, desc, f, see_also, examples, empty_value -): + cls, + name: str, + name1: str, + name2: str, + axis_descr: str, + desc: str, + func: Callable, + see_also: str, + examples: str, + empty_value: bool, +) -> Callable: @Substitution( desc=desc, name1=name1, @@ -11166,8 +11193,8 @@ def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs ) return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) return self._reduce( - f, - name, + func, + name=name, axis=axis, skipna=skipna, numeric_only=bool_only, From 0d04683baaf65f6023fa83ab0d985726fb3c98b0 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Wed, 4 Mar 2020 14:52:30 +0100 Subject: [PATCH 238/388] TST: Split and simplify test_value_counts_unique_nunique (#32281) --- pandas/tests/base/test_ops.py | 282 +++++++++++++++------------------- 1 file changed, 126 insertions(+), 156 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 39dca1a9742df..8f48d0a3e8378 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -1,3 +1,4 @@ +import collections from datetime import datetime, timedelta from io import StringIO import sys @@ -15,7 +16,6 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_object_dtype, - is_period_dtype, needs_i8_conversion, ) @@ -26,11 +26,9 @@ Index, Interval, IntervalIndex, - PeriodIndex, Series, Timedelta, TimedeltaIndex, - Timestamp, ) import pandas._testing as tm @@ -207,180 +205,152 @@ def test_ndarray_compat_properties(self, index_or_series_obj): assert Index([1]).item() == 1 assert Series([1]).item() == 1 - def test_value_counts_unique_nunique(self, index_or_series_obj): - orig = index_or_series_obj - obj = orig.copy() - klass = type(obj) - values = obj._values - - if orig.duplicated().any(): - pytest.xfail( - "The test implementation isn't flexible enough to deal " - "with duplicated values. This isn't a bug in the " - "application code, but in the test code." - ) + def test_unique(self, index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + result = obj.unique() - # create repeated values, 'n'th element is repeated by n+1 times - if isinstance(obj, Index): - expected_index = Index(obj[::-1]) - expected_index.name = None - obj = obj.repeat(range(1, len(obj) + 1)) + # dict.fromkeys preserves the order + unique_values = list(dict.fromkeys(obj.values)) + if isinstance(obj, pd.MultiIndex): + expected = pd.MultiIndex.from_tuples(unique_values) + expected.names = obj.names + tm.assert_index_equal(result, expected) + elif isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj): + expected = expected.normalize() + tm.assert_index_equal(result, expected) else: - expected_index = Index(values[::-1]) - idx = obj.index.repeat(range(1, len(obj) + 1)) - # take-based repeat - indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1)) - rep = values.take(indices) - obj = klass(rep, index=idx) - - # check values has the same dtype as the original - assert obj.dtype == orig.dtype - - expected_s = Series( - range(len(orig), 0, -1), index=expected_index, dtype="int64" - ) + expected = np.array(unique_values) + tm.assert_numpy_array_equal(result, expected) - result = obj.value_counts() - tm.assert_series_equal(result, expected_s) - assert result.index.name is None + @pytest.mark.parametrize("null_obj", [np.nan, None]) + def test_unique_null(self, null_obj, index_or_series_obj): + obj = index_or_series_obj + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(obj, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj.values + if needs_i8_conversion(obj): + values[0:2] = iNaT + else: + values[0:2] = null_obj + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) result = obj.unique() - if isinstance(obj, Index): - assert isinstance(result, type(obj)) - tm.assert_index_equal(result, orig) - assert result.dtype == orig.dtype - elif is_datetime64tz_dtype(obj): - # datetimetz Series returns array of Timestamp - assert result[0] == orig[0] - for r in result: - assert isinstance(r, Timestamp) - - tm.assert_numpy_array_equal( - result.astype(object), orig._values.astype(object) - ) + + unique_values_raw = dict.fromkeys(obj.values) + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + unique_values_not_null = [ + val for val in unique_values_raw if not pd.isnull(val) + ] + unique_values = [null_obj] + unique_values_not_null + + if isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj): + result = result.normalize() + expected = expected.normalize() + elif isinstance(obj, pd.CategoricalIndex): + expected = expected.set_categories(unique_values_not_null) + tm.assert_index_equal(result, expected) else: - tm.assert_numpy_array_equal(result, orig.values) - assert result.dtype == orig.dtype + expected = np.array(unique_values, dtype=obj.dtype) + tm.assert_numpy_array_equal(result, expected) - # dropna=True would break for MultiIndex - assert obj.nunique(dropna=False) == len(np.unique(obj.values)) + def test_nunique(self, index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + expected = len(obj.unique()) + assert obj.nunique(dropna=False) == expected @pytest.mark.parametrize("null_obj", [np.nan, None]) - def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): - orig = index_or_series_obj - obj = orig.copy() - klass = type(obj) - values = obj._ndarray_values - num_values = len(orig) + def test_nunique_null(self, null_obj, index_or_series_obj): + obj = index_or_series_obj if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") - elif isinstance(orig, (pd.CategoricalIndex, pd.IntervalIndex)): - pytest.skip(f"values of {klass} cannot be changed") - elif isinstance(orig, pd.MultiIndex): - pytest.skip("MultiIndex doesn't support isna") - elif orig.duplicated().any(): - pytest.xfail( - "The test implementation isn't flexible enough to deal " - "with duplicated values. This isn't a bug in the " - "application code, but in the test code." - ) - - # special assign to the numpy array - if is_datetime64tz_dtype(obj): - if isinstance(obj, DatetimeIndex): - v = obj.asi8 - v[0:2] = iNaT - values = obj._shallow_copy(v) - else: - obj = obj.copy() - obj[0:2] = pd.NaT - values = obj._values + elif isinstance(obj, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") - elif is_period_dtype(obj): - values[0:2] = iNaT - parr = type(obj._data)(values, dtype=obj.dtype) - values = obj._shallow_copy(parr) - elif needs_i8_conversion(obj): + values = obj.values + if needs_i8_conversion(obj): values[0:2] = iNaT - values = obj._shallow_copy(values) else: values[0:2] = null_obj - # check values has the same dtype as the original - assert values.dtype == obj.dtype - - # create repeated values, 'n'th element is repeated by n+1 - # times - if isinstance(obj, (DatetimeIndex, PeriodIndex)): - expected_index = obj.copy() - expected_index.name = None + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) - # attach name to klass - obj = klass(values.repeat(range(1, len(obj) + 1))) - obj.name = "a" - else: - if isinstance(obj, DatetimeIndex): - expected_index = orig._values._shallow_copy(values) - else: - expected_index = Index(values) - expected_index.name = None - obj = obj.repeat(range(1, len(obj) + 1)) - obj.name = "a" - - # check values has the same dtype as the original - assert obj.dtype == orig.dtype - - # check values correctly have NaN - nanloc = np.zeros(len(obj), dtype=np.bool) - nanloc[:3] = True - if isinstance(obj, Index): - tm.assert_numpy_array_equal(pd.isna(obj), nanloc) + if isinstance(obj, pd.CategoricalIndex): + assert obj.nunique() == len(obj.categories) + assert obj.nunique(dropna=False) == len(obj.categories) + 1 else: - exp = Series(nanloc, obj.index, name="a") - tm.assert_series_equal(pd.isna(obj), exp) - - expected_data = list(range(num_values, 2, -1)) - expected_data_na = expected_data.copy() - if expected_data_na: - expected_data_na.append(3) - expected_s_na = Series( - expected_data_na, - index=expected_index[num_values - 1 : 0 : -1], - dtype="int64", - name="a", - ) - expected_s = Series( - expected_data, - index=expected_index[num_values - 1 : 1 : -1], - dtype="int64", - name="a", - ) + num_unique_values = len(obj.unique()) + assert obj.nunique() == max(0, num_unique_values - 1) + assert obj.nunique(dropna=False) == max(0, num_unique_values) - result_s_na = obj.value_counts(dropna=False) - tm.assert_series_equal(result_s_na, expected_s_na) - assert result_s_na.index.name is None - assert result_s_na.name == "a" - result_s = obj.value_counts() - tm.assert_series_equal(obj.value_counts(), expected_s) - assert result_s.index.name is None - assert result_s.name == "a" + def test_value_counts(self, index_or_series_obj): + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) + result = obj.value_counts() - result = obj.unique() - if isinstance(obj, Index): - tm.assert_index_equal(result, Index(values[1:], name="a")) - elif is_datetime64tz_dtype(obj): - # unable to compare NaT / nan - tm.assert_extension_array_equal(result[1:], values[2:]) - assert result[0] is pd.NaT - elif len(obj) > 0: - tm.assert_numpy_array_equal(result[1:], values[2:]) - - assert pd.isna(result[0]) - assert result.dtype == orig.dtype - - assert obj.nunique() == max(0, num_values - 2) - assert obj.nunique(dropna=False) == max(0, num_values - 1) + counter = collections.Counter(obj) + expected = pd.Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) + expected.index = expected.index.astype(obj.dtype) + if isinstance(obj, pd.MultiIndex): + expected.index = pd.Index(expected.index) + + # sort_index to avoid switched order when values share the same count + result = result.sort_index() + expected = expected.sort_index() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("null_obj", [np.nan, None]) + def test_value_counts_null(self, null_obj, index_or_series_obj): + orig = index_or_series_obj + obj = orig.copy() + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(orig, pd.MultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") + + values = obj.values + if needs_i8_conversion(obj): + values[0:2] = iNaT + else: + values[0:2] = null_obj + + klass = type(obj) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) + + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + counter = collections.Counter(obj.dropna()) + expected = pd.Series(dict(counter.most_common()), dtype=np.int64) + expected.index = expected.index.astype(obj.dtype) + + tm.assert_series_equal(obj.value_counts(), expected) + + # can't use expected[null_obj] = 3 as + # IntervalIndex doesn't allow assignment + new_entry = pd.Series({np.nan: 3}, dtype=np.int64) + expected = expected.append(new_entry) + tm.assert_series_equal(obj.value_counts(dropna=False), expected) def test_value_counts_inferred(self, index_or_series): klass = index_or_series From 67aae8028773eae231662e47ec2c46124ad4229e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Mar 2020 06:07:56 -0800 Subject: [PATCH 239/388] CLN: avoid values_from_object in NDFrame (#32422) --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e36eace1e42e6..f0147859cae97 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1327,7 +1327,7 @@ def equals(self, other): # Unary Methods def __neg__(self): - values = com.values_from_object(self) + values = self._values if is_bool_dtype(values): arr = operator.inv(values) elif ( @@ -1341,7 +1341,7 @@ def __neg__(self): return self.__array_wrap__(arr) def __pos__(self): - values = com.values_from_object(self) + values = self._values if is_bool_dtype(values) or is_period_arraylike(values): arr = values elif ( @@ -1796,7 +1796,7 @@ def empty(self) -> bool_t: __array_priority__ = 1000 def __array__(self, dtype=None) -> np.ndarray: - return com.values_from_object(self) + return np.asarray(self._values, dtype=dtype) def __array_wrap__(self, result, context=None): result = lib.item_from_zerodim(result) @@ -8521,7 +8521,7 @@ def _where( # try to not change dtype at first (if try_quick) if try_quick: - new_other = com.values_from_object(self) + new_other = np.asarray(self) new_other = new_other.copy() new_other[icond] = other other = new_other From 3242b4e32630c9e95eb6a40a9c14223828520124 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Mar 2020 06:14:19 -0800 Subject: [PATCH 240/388] CLN: move away from .values, _ndarray_values (#32419) --- pandas/_testing.py | 16 ++++++++-------- pandas/core/dtypes/cast.py | 4 +++- pandas/core/frame.py | 2 +- pandas/core/indexes/multi.py | 4 ++-- pandas/core/indexes/numeric.py | 2 +- pandas/io/stata.py | 2 +- pandas/plotting/_matplotlib/converter.py | 6 +++--- 7 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index b0f18cb6fdd39..33ec4e4886aa6 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -706,11 +706,11 @@ def _get_ilevel_values(index, level): if isinstance(left, pd.PeriodIndex) or isinstance(right, pd.PeriodIndex): assert_attr_equal("freq", left, right, obj=obj) if isinstance(left, pd.IntervalIndex) or isinstance(right, pd.IntervalIndex): - assert_interval_array_equal(left.values, right.values) + assert_interval_array_equal(left._values, right._values) if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): - assert_categorical_equal(left.values, right.values, obj=f"{obj} category") + assert_categorical_equal(left._values, right._values, obj=f"{obj} category") def assert_class_equal(left, right, exact: Union[bool, str] = True, obj="Input"): @@ -883,7 +883,7 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray") def assert_period_array_equal(left, right, obj="PeriodArray"): _check_isinstance(left, right, PeriodArray) - assert_numpy_array_equal(left._data, right._data, obj=f"{obj}.values") + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") assert_attr_equal("freq", left, right, obj=obj) @@ -1170,10 +1170,10 @@ def assert_series_equal( # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal - if not Index(left.values).equals(Index(right.values)): + if not Index(left._values).equals(Index(right._values)): msg = ( - f"[datetimelike_compat=True] {left.values} " - f"is not equal to {right.values}." + f"[datetimelike_compat=True] {left._values} " + f"is not equal to {right._values}." ) raise AssertionError(msg) else: @@ -1212,8 +1212,8 @@ def assert_series_equal( if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal( - left.values, - right.values, + left._values, + right._values, obj=f"{obj} category", check_category_order=check_category_order, ) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c06bd8a1d6e36..7dac36b53fce5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1181,9 +1181,11 @@ def try_timedelta(v): from pandas import to_timedelta try: - return to_timedelta(v)._ndarray_values.reshape(shape) + td_values = to_timedelta(v) except ValueError: return v.reshape(shape) + else: + return np.asarray(td_values).reshape(shape) inferred_type = lib.infer_datetimelike_array(ensure_object(v)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5a4b4f74b82a5..61641bfb24293 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4580,7 +4580,7 @@ def drop_duplicates( duplicated = self.duplicated(subset, keep=keep) if inplace: - (inds,) = (-duplicated)._ndarray_values.nonzero() + (inds,) = np.asarray(-duplicated).nonzero() new_data = self._data.take(inds) if ignore_index: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f70975e19b9a4..c1efa512f326a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2998,7 +2998,7 @@ def _update_indexer(idxr, indexer=indexer): indexer = _update_indexer(indexers, indexer=indexer) else: # no matches we are done - return Int64Index([])._ndarray_values + return np.array([], dtype=np.int64) elif com.is_null_slice(k): # empty slice @@ -3024,7 +3024,7 @@ def _update_indexer(idxr, indexer=indexer): # empty indexer if indexer is None: - return Int64Index([])._ndarray_values + return np.array([], dtype=np.int64) indexer = self._reorder_indexer(seq, indexer) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index cb6f68ae0376d..6c250ccd09a51 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -425,7 +425,7 @@ def equals(self, other) -> bool: other = self._constructor(other) if not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape: return False - left, right = self._ndarray_values, other._ndarray_values + left, right = self._values, other._values return ((left == right) | (self._isnan & other._isnan)).all() except (TypeError, ValueError): return False diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 0397dfa923afb..6e79f5890f76d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1672,7 +1672,7 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra continue if convert_missing: # Replacement follows Stata notation - missing_loc = np.nonzero(missing._ndarray_values)[0] + missing_loc = np.nonzero(np.asarray(missing))[0] umissing, umissing_loc = np.unique(series[missing], return_inverse=True) replacement = Series(series, dtype=np.object) for j, um in enumerate(umissing): diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index c399e5b9b7017..8260684c02ea6 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -218,13 +218,13 @@ def _convert_1d(values, units, axis): if isinstance(values, valid_types) or is_integer(values) or is_float(values): return get_datevalue(values, axis.freq) elif isinstance(values, PeriodIndex): - return values.asfreq(axis.freq)._ndarray_values + return values.asfreq(axis.freq).asi8 elif isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) elif lib.infer_dtype(values, skipna=False) == "period": # https://github.com/pandas-dev/pandas/issues/24304 # convert ndarray[period] -> PeriodIndex - return PeriodIndex(values, freq=axis.freq)._ndarray_values + return PeriodIndex(values, freq=axis.freq).asi8 elif isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values @@ -607,7 +607,7 @@ def _daily_finder(vmin, vmax, freq): info = np.zeros( span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")] ) - info["val"][:] = dates_._ndarray_values + info["val"][:] = dates_.asi8 info["fmt"][:] = "" info["maj"][[0, -1]] = True # .. and set some shortcuts From fd8384e4955267dca875543a648c211ff375dc15 Mon Sep 17 00:00:00 2001 From: Harshavardhan Bachina Date: Wed, 4 Mar 2020 08:17:15 -0600 Subject: [PATCH 241/388] Series append raises TypeError (#32090) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/series.py | 6 ++++++ pandas/tests/series/methods/test_append.py | 18 +++++++++--------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 57c53f73962dc..7c68005144cbc 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -327,6 +327,7 @@ Reshaping - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) - :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) +- :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) @@ -349,7 +350,6 @@ Other instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`) - Set operations on an object-dtype :class:`Index` now always return object-dtype results (:issue:`31401`) - Bug in :meth:`AbstractHolidayCalendar.holidays` when no rules were defined (:issue:`31415`) -- .. --------------------------------------------------------------------------- diff --git a/pandas/core/series.py b/pandas/core/series.py index db63e9205d48d..12164a4b8ff6b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2534,6 +2534,12 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): to_concat.extend(to_append) else: to_concat = [self, to_append] + if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]): + msg = ( + f"to_append should be a Series or list/tuple of Series, " + f"got DataFrame" + ) + raise TypeError(msg) return concat( to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity ) diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index 4742d6ae3544f..158c759fdaef3 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -61,15 +61,15 @@ def test_append_tuples(self): tm.assert_series_equal(expected, result) - def test_append_dataframe_regression(self): - # GH 30975 - df = pd.DataFrame({"A": [1, 2]}) - result = df.A.append([df]) - expected = pd.DataFrame( - {0: [1.0, 2.0, None, None], "A": [None, None, 1.0, 2.0]}, index=[0, 1, 0, 1] - ) - - tm.assert_frame_equal(expected, result) + def test_append_dataframe_raises(self): + # GH 31413 + df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) + + msg = "to_append should be a Series or list/tuple of Series, got DataFrame" + with pytest.raises(TypeError, match=msg): + df.A.append(df) + with pytest.raises(TypeError, match=msg): + df.A.append([df]) class TestSeriesAppendWithDatetimeIndex: From 9aaaf1b92aafb1f8d7c7cafa2ad87d84cd38ba24 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 4 Mar 2020 15:22:35 +0100 Subject: [PATCH 242/388] Fix BUG: overflow on pd.Timedelta(nanoseconds=) constructor (#32424) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/tests/tslibs/test_timedeltas.py | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7c68005144cbc..5310419bfc100 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -234,7 +234,7 @@ Numeric Conversion ^^^^^^^^^^ - Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) -- +- Bug in :class:`Timedelta` construction with large nanoseconds keyword value (:issue:`34202`) - Strings diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 298028227e18b..7bd02b734beeb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1198,7 +1198,7 @@ class Timedelta(_Timedelta): kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} - nano = np.timedelta64(kwargs.pop('nanoseconds', 0), 'ns') + nano = convert_to_timedelta64(kwargs.pop('nanoseconds', 0), 'ns') try: value = nano + convert_to_timedelta64(timedelta(**kwargs), 'ns') diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index 86d5cc749b5e1..c87752ccf151e 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -28,3 +28,9 @@ def test_delta_to_nanoseconds_error(): with pytest.raises(TypeError, match=""): delta_to_nanoseconds(obj) + + +def test_huge_nanoseconds_overflow(): + # GH 32402 + assert delta_to_nanoseconds(Timedelta(1e10)) == 1e10 + assert delta_to_nanoseconds(Timedelta(nanoseconds=1e10)) == 1e10 From 86ed2b654abfe82adeba2bae7c4424377d428d91 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Mar 2020 06:28:52 -0800 Subject: [PATCH 243/388] PERF: lazify blknos and blklocs (#32261) --- pandas/core/internals/concat.py | 8 +-- pandas/core/internals/managers.py | 59 +++++++++++++++----- pandas/tests/frame/indexing/test_indexing.py | 5 +- pandas/tests/frame/test_nonunique_indexes.py | 4 +- pandas/tests/internals/test_internals.py | 3 +- 5 files changed, 57 insertions(+), 22 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 515e1bcd761b6..7570f6eddbd9c 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -48,8 +48,8 @@ def get_mgr_concatenation_plan(mgr, indexers): if 0 in indexers: ax0_indexer = indexers.pop(0) - blknos = algos.take_1d(mgr._blknos, ax0_indexer, fill_value=-1) - blklocs = algos.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1) + blknos = algos.take_1d(mgr.blknos, ax0_indexer, fill_value=-1) + blklocs = algos.take_1d(mgr.blklocs, ax0_indexer, fill_value=-1) else: if mgr._is_single_block: @@ -57,8 +57,8 @@ def get_mgr_concatenation_plan(mgr, indexers): return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))] ax0_indexer = None - blknos = mgr._blknos - blklocs = mgr._blklocs + blknos = mgr.blknos + blklocs = mgr.blklocs plan = [] for blkno, placements in libinternals.get_blkno_placements(blknos, group=False): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c0e32066a8a70..7320a6407b248 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -141,9 +141,37 @@ def __init__( if do_integrity_check: self._verify_integrity() + # Populate known_consolidate, blknos, and blklocs lazily self._known_consolidated = False + self._blknos = None + self._blklocs = None - self._rebuild_blknos_and_blklocs() + @property + def blknos(self): + """ + Suppose we want to find the array corresponding to our i'th column. + + blknos[i] identifies the block from self.blocks that contains this column. + + blklocs[i] identifies the column of interest within + self.blocks[self.blknos[i]] + """ + if self._blknos is None: + # Note: these can be altered by other BlockManager methods. + self._rebuild_blknos_and_blklocs() + + return self._blknos + + @property + def blklocs(self): + """ + See blknos.__doc__ + """ + if self._blklocs is None: + # Note: these can be altered by other BlockManager methods. + self._rebuild_blknos_and_blklocs() + + return self._blklocs def make_empty(self, axes=None) -> "BlockManager": """ return an empty BlockManager with the items axis of len 0 """ @@ -230,6 +258,7 @@ def _rebuild_blknos_and_blklocs(self) -> None: new_blklocs[rl.indexer] = np.arange(len(rl)) if (new_blknos == -1).any(): + # TODO: can we avoid this? it isn't cheap raise AssertionError("Gaps in blk ref_locs") self._blknos = new_blknos @@ -253,7 +282,7 @@ def get_dtype_counts(self): def get_dtypes(self): dtypes = np.array([blk.dtype for blk in self.blocks]) - return algos.take_1d(dtypes, self._blknos, allow_fill=False) + return algos.take_1d(dtypes, self.blknos, allow_fill=False) def __getstate__(self): block_values = [b.values for b in self.blocks] @@ -951,8 +980,8 @@ def iget(self, i: int) -> "SingleBlockManager": """ Return the data as a SingleBlockManager. """ - block = self.blocks[self._blknos[i]] - values = block.iget(self._blklocs[i]) + block = self.blocks[self.blknos[i]] + values = block.iget(self.blklocs[i]) # shortcut for select a single-dim from a 2-dim BM return SingleBlockManager( @@ -980,7 +1009,7 @@ def delete(self, item): else: affected_start = is_deleted.nonzero()[0][0] - for blkno, _ in _fast_count_smallints(self._blknos[affected_start:]): + for blkno, _ in _fast_count_smallints(self.blknos[affected_start:]): blk = self.blocks[blkno] bml = blk.mgr_locs blk_del = is_deleted[bml.indexer].nonzero()[0] @@ -1026,6 +1055,8 @@ def iset(self, loc: Union[int, slice, np.ndarray], value): """ # FIXME: refactor, clearly separate broadcasting & zip-like assignment # can prob also fix the various if tests for sparse/categorical + if self._blklocs is None and self.ndim > 1: + self._rebuild_blknos_and_blklocs() value_is_extension_type = is_extension_array_dtype(value) @@ -1055,8 +1086,9 @@ def value_getitem(placement): if isinstance(loc, int): loc = [loc] - blknos = self._blknos[loc] - blklocs = self._blklocs[loc].copy() + # Accessing public blknos ensures the public versions are initialized + blknos = self.blknos[loc] + blklocs = self.blklocs[loc].copy() unfit_mgr_locs = [] unfit_val_locs = [] @@ -1161,7 +1193,7 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1)) - for blkno, count in _fast_count_smallints(self._blknos[loc:]): + for blkno, count in _fast_count_smallints(self.blknos[loc:]): blk = self.blocks[blkno] if count == len(blk.mgr_locs): blk.mgr_locs = blk.mgr_locs.add(1) @@ -1170,7 +1202,8 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): new_mgr_locs[new_mgr_locs >= loc] += 1 blk.mgr_locs = new_mgr_locs - if loc == self._blklocs.shape[0]: + # Accessing public blklocs ensures the public versions are initialized + if loc == self.blklocs.shape[0]: # np.append is a lot faster, let's use it if we can. self._blklocs = np.append(self._blklocs, 0) self._blknos = np.append(self._blknos, len(self.blocks)) @@ -1301,14 +1334,14 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): ] if sl_type in ("slice", "mask"): - blknos = self._blknos[slobj] - blklocs = self._blklocs[slobj] + blknos = self.blknos[slobj] + blklocs = self.blklocs[slobj] else: blknos = algos.take_1d( - self._blknos, slobj, fill_value=-1, allow_fill=allow_fill + self.blknos, slobj, fill_value=-1, allow_fill=allow_fill ) blklocs = algos.take_1d( - self._blklocs, slobj, fill_value=-1, allow_fill=allow_fill + self.blklocs, slobj, fill_value=-1, allow_fill=allow_fill ) # When filling blknos, make sure blknos is updated before appending to diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 997414eceeb86..b0cb988720c25 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -2213,16 +2213,17 @@ def test_object_casting_indexing_wraps_datetimelike(): assert isinstance(ser.values[2], pd.Timedelta) mgr = df._data + mgr._rebuild_blknos_and_blklocs() arr = mgr.fast_xs(0) assert isinstance(arr[1], pd.Timestamp) assert isinstance(arr[2], pd.Timedelta) - blk = mgr.blocks[mgr._blknos[1]] + blk = mgr.blocks[mgr.blknos[1]] assert blk.dtype == "M8[ns]" # we got the right block val = blk.iget((0, 0)) assert isinstance(val, pd.Timestamp) - blk = mgr.blocks[mgr._blknos[2]] + blk = mgr.blocks[mgr.blknos[2]] assert blk.dtype == "m8[ns]" # we got the right block val = blk.iget((0, 0)) assert isinstance(val, pd.Timedelta) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 32ead406a3e86..233c0f4bd3544 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -474,8 +474,8 @@ def test_columns_with_dups(self): ) df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) - assert len(df._data._blknos) == len(df.columns) - assert len(df._data._blklocs) == len(df.columns) + assert len(df._data.blknos) == len(df.columns) + assert len(df._data.blklocs) == len(df.columns) # testing iloc for i in range(len(df.columns)): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0b9d84d261708..a569726e9a22a 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -309,7 +309,8 @@ def test_duplicate_ref_loc_failure(self): msg = "Gaps in blk ref_locs" with pytest.raises(AssertionError, match=msg): - BlockManager(blocks, axes) + mgr = BlockManager(blocks, axes) + mgr._rebuild_blknos_and_blklocs() blocks[0].mgr_locs = np.array([0]) blocks[1].mgr_locs = np.array([1]) From 609b8da3506dd9793a150f59bb8691453d12c947 Mon Sep 17 00:00:00 2001 From: Rafif Date: Wed, 4 Mar 2020 22:41:31 +0700 Subject: [PATCH 244/388] Fix PR08, RT02, RT03, and SA01 on pandas.Index.fillna (#32355) --- pandas/core/indexes/base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 173a83103b8ad..65f8b0fb67260 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2129,13 +2129,18 @@ def fillna(self, value=None, downcast=None): Scalar value to use to fill holes (e.g. 0). This value cannot be a list-likes. downcast : dict, default is None - a dict of item->dtype of what to downcast if possible, + A dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate equal type (e.g. float64 to int64 if possible). Returns ------- - filled : Index + Index + + See Also + -------- + DataFrame.fillna : Fill NaN values of a DataFrame. + Series.fillna : Fill NaN Values of a Series. """ self._assert_can_do_op(value) if self.hasnans: From bdb4a08307d809c722ae10d1778780ea07f95293 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Wed, 4 Mar 2020 23:22:04 +0700 Subject: [PATCH 245/388] DOC: Fix EX02 in pandas.Index.get_loc (#32429) --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 65f8b0fb67260..6f44b5abf5b04 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2849,7 +2849,7 @@ def get_loc(self, key, method=None, tolerance=None): >>> non_monotonic_index = pd.Index(list('abcb')) >>> non_monotonic_index.get_loc('b') - array([False, True, False, True], dtype=bool) + array([False, True, False, True]) """ if method is None: if tolerance is not None: From 690e382d2e93198c3629943607cef9fc3012dfd4 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Wed, 4 Mar 2020 11:11:37 -0800 Subject: [PATCH 246/388] CI: fix test_matplotlib_scatter_datetime64 (#32442) --- pandas/plotting/_matplotlib/compat.py | 1 + pandas/tests/plotting/common.py | 1 + pandas/tests/plotting/test_datetimelike.py | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py index e7855068334f7..f2c5032112bc9 100644 --- a/pandas/plotting/_matplotlib/compat.py +++ b/pandas/plotting/_matplotlib/compat.py @@ -20,3 +20,4 @@ def inner(): _mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge) _mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge) _mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge) +_mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index ea0ec8ad98ffe..75b825687209c 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -34,6 +34,7 @@ def setup_method(self, method): self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3() self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0() self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0() + self.mpl_ge_3_2_0 = compat._mpl_ge_3_2_0() self.bp_n_objects = 7 self.polycollection_factor = 2 diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 979b89a87d843..b85a2affc4e4b 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1468,7 +1468,9 @@ def test_matplotlib_scatter_datetime64(self): ax.scatter(x="time", y="y", data=df) self.plt.draw() label = ax.get_xticklabels()[0] - if self.mpl_ge_3_0_0: + if self.mpl_ge_3_2_0: + expected = "2018-01-01" + elif self.mpl_ge_3_0_0: expected = "2017-12-08" else: expected = "2017-12-12" From c5f11ab79e5553a28a91fc7036c8dcbfc8cbc697 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Wed, 4 Mar 2020 14:24:38 -0800 Subject: [PATCH 247/388] CI: mypy fixup for #32261 (#32438) --- pandas/core/internals/managers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 7320a6407b248..f6e79a0f2045d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -122,6 +122,9 @@ class BlockManager(PandasObject): "_blklocs", ] + _blknos: np.ndarray + _blklocs: np.ndarray + def __init__( self, blocks: Sequence[Block], From d4cc3ccff3efd9bb69f8611fc7e24906114d8b30 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 5 Mar 2020 10:10:34 +0000 Subject: [PATCH 248/388] CI: ax.rowNum and ax.colNum attributes deprecated in Matplotlib 3.2 (#32444) --- pandas/plotting/_matplotlib/tools.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 5743288982da4..08d945f679810 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -9,6 +9,8 @@ from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.plotting._matplotlib import compat + def format_date_labels(ax, rot): # mini version of autofmt_xdate @@ -288,6 +290,12 @@ def _remove_labels_from_axis(axis): def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): if nplots > 1: + if compat._mpl_ge_3_2_0(): + row_num = lambda x: x.get_subplotspec().rowspan.start + col_num = lambda x: x.get_subplotspec().colspan.start + else: + row_num = lambda x: x.rowNum + col_num = lambda x: x.colNum if nrows > 1: try: @@ -295,13 +303,13 @@ def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): # so that we can correctly handle 'gaps" layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) for ax in axarr: - layout[ax.rowNum, ax.colNum] = ax.get_visible() + layout[row_num(ax), col_num(ax)] = ax.get_visible() for ax in axarr: # only the last row of subplots should get x labels -> all # other off layout handles the case that the subplot is # the last in the column, because below is no subplot/gap. - if not layout[ax.rowNum + 1, ax.colNum]: + if not layout[row_num(ax) + 1, col_num(ax)]: continue if sharex or len(ax.get_shared_x_axes().get_siblings(ax)) > 1: _remove_labels_from_axis(ax.xaxis) From 79c9ec938b9bfabd9d0eec15d01942c3ea113649 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 5 Mar 2020 10:49:41 +0000 Subject: [PATCH 249/388] CLN: avoid _internal_get_values in groupby.generic (#32427) --- pandas/core/groupby/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index fb935c9065b83..ac522fc7863b2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -589,7 +589,7 @@ def nunique(self, dropna: bool = True) -> Series: """ ids, _, _ = self.grouper.group_info - val = self.obj._internal_get_values() + val = self.obj._values codes, _ = algorithms.factorize(val, sort=False) sorter = np.lexsort((codes, ids)) @@ -657,7 +657,7 @@ def value_counts( ) ids, _, _ = self.grouper.group_info - val = self.obj._internal_get_values() + val = self.obj._values # groupby removes null keys from groupings mask = ids != -1 @@ -774,7 +774,7 @@ def count(self) -> Series: Count of values within each group. """ ids, _, ngroups = self.grouper.group_info - val = self.obj._internal_get_values() + val = self.obj._values mask = (ids != -1) & ~isna(val) ids = ensure_platform_int(ids) From f7bed0583556f66f18f6391eddf3e0802c4933f3 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 5 Mar 2020 11:23:33 +0000 Subject: [PATCH 250/388] DOC: correct issue number for PR #32424 (#32436) --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5310419bfc100..44deab25db695 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -234,7 +234,7 @@ Numeric Conversion ^^^^^^^^^^ - Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) -- Bug in :class:`Timedelta` construction with large nanoseconds keyword value (:issue:`34202`) +- Bug in :class:`Timedelta` construction with large nanoseconds keyword value (:issue:`32402`) - Strings From 8914c01188346ee61e5dc10fec9cd5a0ad441a44 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 6 Mar 2020 07:12:47 +0000 Subject: [PATCH 251/388] TYP/CLN: Optional[Hashable] -> pandas._typing.Label (#32371) --- pandas/core/frame.py | 12 ++++++------ pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 4 ++-- pandas/core/reshape/concat.py | 16 ++++++---------- pandas/core/series.py | 6 +----- pandas/io/pytables.py | 18 ++++-------------- 6 files changed, 20 insertions(+), 38 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 61641bfb24293..60d89b2076e92 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -890,7 +890,7 @@ def style(self) -> "Styler": """ @Appender(_shared_docs["items"]) - def items(self) -> Iterable[Tuple[Optional[Hashable], Series]]: + def items(self) -> Iterable[Tuple[Label, Series]]: if self.columns.is_unique and hasattr(self, "_item_cache"): for k in self.columns: yield k, self._get_item_cache(k) @@ -899,10 +899,10 @@ def items(self) -> Iterable[Tuple[Optional[Hashable], Series]]: yield k, self._ixs(i, axis=1) @Appender(_shared_docs["items"]) - def iteritems(self) -> Iterable[Tuple[Optional[Hashable], Series]]: + def iteritems(self) -> Iterable[Tuple[Label, Series]]: yield from self.items() - def iterrows(self) -> Iterable[Tuple[Optional[Hashable], Series]]: + def iterrows(self) -> Iterable[Tuple[Label, Series]]: """ Iterate over DataFrame rows as (index, Series) pairs. @@ -4043,7 +4043,7 @@ def set_index( "one-dimensional arrays." ) - missing: List[Optional[Hashable]] = [] + missing: List[Label] = [] for col in keys: if isinstance( col, (ABCIndexClass, ABCSeries, np.ndarray, list, abc.Iterator) @@ -4082,7 +4082,7 @@ def set_index( else: arrays.append(self.index) - to_remove: List[Optional[Hashable]] = [] + to_remove: List[Label] = [] for col in keys: if isinstance(col, ABCMultiIndex): for n in range(col.nlevels): @@ -4137,7 +4137,7 @@ def reset_index( drop: bool = False, inplace: bool = False, col_level: Hashable = 0, - col_fill: Optional[Hashable] = "", + col_fill: Label = "", ) -> Optional["DataFrame"]: """ Reset the index, or a level of it. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f0147859cae97..866b58c1ffe3d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9729,7 +9729,7 @@ def describe_1d(data): ldesc = [describe_1d(s) for _, s in data.items()] # set a convenient order for rows - names: List[Optional[Hashable]] = [] + names: List[Label] = [] ldesc_indexes = sorted((x.index for x in ldesc), key=len) for idxnames in ldesc_indexes: for name in idxnames: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6f44b5abf5b04..7e391e7a03fbb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import TYPE_CHECKING, Any, FrozenSet, Hashable, Optional, Union +from typing import TYPE_CHECKING, Any, FrozenSet, Hashable, Union import warnings import numpy as np @@ -5546,7 +5546,7 @@ def default_index(n): return RangeIndex(0, n, name=None) -def maybe_extract_name(name, obj, cls) -> Optional[Hashable]: +def maybe_extract_name(name, obj, cls) -> Label: """ If no name is passed, then extract it from data, validating hashability. """ diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 06a180d4a096e..091129707228f 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -2,11 +2,11 @@ Concat routines. """ -from typing import Hashable, Iterable, List, Mapping, Optional, Union, overload +from typing import Iterable, List, Mapping, Union, overload import numpy as np -from pandas._typing import FrameOrSeriesUnion +from pandas._typing import FrameOrSeriesUnion, Label from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -32,7 +32,7 @@ @overload def concat( - objs: Union[Iterable["DataFrame"], Mapping[Optional[Hashable], "DataFrame"]], + objs: Union[Iterable["DataFrame"], Mapping[Label, "DataFrame"]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -48,9 +48,7 @@ def concat( @overload def concat( - objs: Union[ - Iterable[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion] - ], + objs: Union[Iterable[FrameOrSeriesUnion], Mapping[Label, FrameOrSeriesUnion]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -65,9 +63,7 @@ def concat( def concat( - objs: Union[ - Iterable[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion] - ], + objs: Union[Iterable[FrameOrSeriesUnion], Mapping[Label, FrameOrSeriesUnion]], axis=0, join="outer", ignore_index: bool = False, @@ -536,7 +532,7 @@ def _get_concat_axis(self) -> Index: idx = ibase.default_index(len(self.objs)) return idx elif self.keys is None: - names: List[Optional[Hashable]] = [None] * len(self.objs) + names: List[Label] = [None] * len(self.objs) num = 0 has_names = False for i, x in enumerate(self.objs): diff --git a/pandas/core/series.py b/pandas/core/series.py index 12164a4b8ff6b..bb4e222193608 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -692,11 +692,7 @@ def __array_ufunc__( inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) result = getattr(ufunc, method)(*inputs, **kwargs) - name: Label - if len(set(names)) == 1: - name = names[0] - else: - name = None + name = names[0] if len(set(names)) == 1 else None def construct_return(result): if lib.is_scalar(result): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 168666ea21f45..7aeed5c316d7f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -8,17 +8,7 @@ import itertools import os import re -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Hashable, - List, - Optional, - Tuple, - Type, - Union, -) +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union import warnings import numpy as np @@ -27,7 +17,7 @@ from pandas._libs import lib, writers as libwriters from pandas._libs.tslibs import timezones -from pandas._typing import ArrayLike, FrameOrSeries +from pandas._typing import ArrayLike, FrameOrSeries, Label from pandas.compat._optional import import_optional_dependency from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -2811,7 +2801,7 @@ def read_multi_index( levels = [] codes = [] - names: List[Optional[Hashable]] = [] + names: List[Label] = [] for i in range(nlevels): level_key = f"{key}_level{i}" node = getattr(self.group, level_key) @@ -2976,7 +2966,7 @@ class SeriesFixed(GenericFixed): pandas_kind = "series" attributes = ["name"] - name: Optional[Hashable] + name: Label @property def shape(self): From f6b3e82b5514da97b48918c99bd28a07b754291b Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Fri, 6 Mar 2020 19:30:36 +0700 Subject: [PATCH 252/388] DOC: Fix errors in pandas.Series.argmin (#32286) Co-authored-by: Farhan Reynaldo Hutabarat Co-authored-by: Simon Hawkins --- pandas/core/base.py | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index f55d9f905945d..508582540e169 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -927,16 +927,17 @@ def max(self, axis=None, skipna=True, *args, **kwargs): nv.validate_max(args, kwargs) return nanops.nanmax(self._values, skipna=skipna) + @doc(op="max", oppose="min", value="largest") def argmax(self, axis=None, skipna=True, *args, **kwargs): """ - Return int position of the largest value in the Series. + Return int position of the {value} value in the Series. - If the maximum is achieved in multiple locations, + If the {op}imum is achieved in multiple locations, the first row position is returned. Parameters ---------- - axis : {None} + axis : {{None}} Dummy argument for consistency with Series. skipna : bool, default True Exclude NA/null values when showing the result. @@ -946,12 +947,13 @@ def argmax(self, axis=None, skipna=True, *args, **kwargs): Returns ------- int - Row position of the maximum values. + Row position of the {op}imum value. See Also -------- - numpy.ndarray.argmax : Equivalent method for numpy arrays. - Series.argmin : Similar method, but returning the minimum. + Series.arg{op} : Return position of the {op}imum value. + Series.arg{oppose} : Return position of the {oppose}imum value. + numpy.ndarray.arg{op} : Equivalent method for numpy arrays. Series.idxmax : Return index label of the maximum values. Series.idxmin : Return index label of the minimum values. @@ -959,8 +961,8 @@ def argmax(self, axis=None, skipna=True, *args, **kwargs): -------- Consider dataset containing cereal calories - >>> s = pd.Series({'Corn Flakes': 100.0, 'Almond Delight': 110.0, - ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}) + >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0, + ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}}) >>> s Corn Flakes 100.0 Almond Delight 110.0 @@ -970,8 +972,11 @@ def argmax(self, axis=None, skipna=True, *args, **kwargs): >>> s.argmax() 2 + >>> s.argmin() + 0 - The maximum cereal calories is in the third element, + The maximum cereal calories is the third element and + the minimum cereal calories is the first element, since series is zero-indexed. """ nv.validate_minmax_axis(axis) @@ -1019,25 +1024,8 @@ def min(self, axis=None, skipna=True, *args, **kwargs): nv.validate_min(args, kwargs) return nanops.nanmin(self._values, skipna=skipna) + @doc(argmax, op="min", oppose="max", value="smallest") def argmin(self, axis=None, skipna=True, *args, **kwargs): - """ - Return a ndarray of the minimum argument indexer. - - Parameters - ---------- - axis : {None} - Dummy argument for consistency with Series. - skipna : bool, default True - - Returns - ------- - numpy.ndarray - - See Also - -------- - numpy.ndarray.argmin : Return indices of the minimum values along - the given axis. - """ nv.validate_minmax_axis(axis) nv.validate_argmax_with_skipna(skipna, args, kwargs) return nanops.nanargmin(self._values, skipna=skipna) From 015c1c154b9bc70d3b1981ba1dc31c09676065bf Mon Sep 17 00:00:00 2001 From: Ashwin Prakash Nalwade <53330017+ashwinpn@users.noreply.github.com> Date: Fri, 6 Mar 2020 10:25:32 -0500 Subject: [PATCH 253/388] Made apt changes to the documentation for pandas.Series.str.replace() (#32478) --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 3be9c5fcdfb26..c4bdbaff1649c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -572,7 +572,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): r""" Replace occurrences of pattern/regex in the Series/Index with some other string. Equivalent to :meth:`str.replace` or - :func:`re.sub`. + :func:`re.sub`, depending on the regex value. Parameters ---------- From 96a40ed9bad308b0e80fcc93e463d0b2b470fffa Mon Sep 17 00:00:00 2001 From: zaki-indra <55521899+zaki-indra@users.noreply.github.com> Date: Sat, 7 Mar 2020 00:00:09 +0700 Subject: [PATCH 254/388] Fixing RT02 pandas.Index.dropna and PR08 pandas.Index.fillna (#32359) --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7e391e7a03fbb..5555b99a0ef88 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2163,7 +2163,7 @@ def dropna(self, how="any"): Returns ------- - valid : Index + Index """ if how not in ("any", "all"): raise ValueError(f"invalid how option: {how}") From 54c5e9e6e0819f848a10045d1cbc8ee936ffd075 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 6 Mar 2020 21:01:11 +0200 Subject: [PATCH 255/388] CLN: imports in pandas/io/excel/_base.py (#32481) Co-authored-by: MomIsBestFriend <> --- pandas/io/excel/_base.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d2f9dd285582f..f98d9501f1f73 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,5 +1,5 @@ import abc -from datetime import date, datetime, timedelta +import datetime from io import BytesIO import os from textwrap import fill @@ -28,7 +28,6 @@ _pop_header_name, get_writer, ) -from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import TextParser _read_excel_doc = ( @@ -742,11 +741,11 @@ def _value_with_fmt(self, val): val = float(val) elif is_bool(val): val = bool(val) - elif isinstance(val, datetime): + elif isinstance(val, datetime.datetime): fmt = self.datetime_format - elif isinstance(val, date): + elif isinstance(val, datetime.date): fmt = self.date_format - elif isinstance(val, timedelta): + elif isinstance(val, datetime.timedelta): val = val.total_seconds() / float(86400) fmt = "0" else: @@ -763,9 +762,7 @@ def check_extension(cls, ext): if ext.startswith("."): ext = ext[1:] if not any(ext in extension for extension in cls.supported_extensions): - msg = "Invalid extension for engine" - f"'{pprint_thing(cls.engine)}': '{pprint_thing(ext)}'" - raise ValueError(msg) + raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'") else: return True From 970499d9660a4f4a1df3e1f8ce3015336ffb4d41 Mon Sep 17 00:00:00 2001 From: Red Date: Sat, 7 Mar 2020 03:48:14 +0700 Subject: [PATCH 256/388] fix errors docstrings pandas.to_numeric (#32354) --- pandas/core/tools/numeric.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 40f376724bd39..7d1e4bbd8fb05 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -35,6 +35,7 @@ def to_numeric(arg, errors="raise", downcast=None): Parameters ---------- arg : scalar, list, tuple, 1-d array, or Series + Argument to be converted. errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaN. @@ -61,7 +62,8 @@ def to_numeric(arg, errors="raise", downcast=None): Returns ------- - ret : numeric if parsing succeeded. + ret + Numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray. See Also From 45d412fcfa1730bcf4e24a598fcdb53ff7db07d0 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 7 Mar 2020 10:40:09 +0100 Subject: [PATCH 257/388] CI: Fix flaky test_value_counts_null (#32449) --- pandas/tests/base/test_ops.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 8f48d0a3e8378..f1cc98a1b773d 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -311,9 +311,10 @@ def test_value_counts(self, index_or_series_obj): if isinstance(obj, pd.MultiIndex): expected.index = pd.Index(expected.index) - # sort_index to avoid switched order when values share the same count - result = result.sort_index() - expected = expected.sort_index() + # TODO: Order of entries with the same count is inconsistent on CI (gh-32449) + if obj.duplicated().any(): + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) @pytest.mark.parametrize("null_obj", [np.nan, None]) @@ -344,13 +345,26 @@ def test_value_counts_null(self, null_obj, index_or_series_obj): expected = pd.Series(dict(counter.most_common()), dtype=np.int64) expected.index = expected.index.astype(obj.dtype) - tm.assert_series_equal(obj.value_counts(), expected) + result = obj.value_counts() + if obj.duplicated().any(): + # TODO: + # Order of entries with the same count is inconsistent on CI (gh-32449) + expected = expected.sort_index() + result = result.sort_index() + tm.assert_series_equal(result, expected) # can't use expected[null_obj] = 3 as # IntervalIndex doesn't allow assignment new_entry = pd.Series({np.nan: 3}, dtype=np.int64) expected = expected.append(new_entry) - tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + result = obj.value_counts(dropna=False) + if obj.duplicated().any(): + # TODO: + # Order of entries with the same count is inconsistent on CI (gh-32449) + expected = expected.sort_index() + result = result.sort_index() + tm.assert_series_equal(result, expected) def test_value_counts_inferred(self, index_or_series): klass = index_or_series From 2e0e013703390377faad57ee97f2cfaf98ba039e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 02:31:16 -0800 Subject: [PATCH 258/388] CLN: assorted cleanups, annotations (#32475) --- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/accessors.py | 15 ++++++++++----- pandas/core/indexes/range.py | 2 +- pandas/core/internals/managers.py | 6 +++--- pandas/core/nanops.py | 4 ++-- pandas/core/series.py | 4 ---- pandas/tests/frame/indexing/test_datetime.py | 2 +- pandas/tseries/frequencies.py | 7 ++----- setup.cfg | 1 + 11 files changed, 23 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7bd02b734beeb..457f3eb0749c2 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1167,7 +1167,7 @@ class Timedelta(_Timedelta): Possible values: - * 'Y', 'M', 'W', 'D', 'T', 'S', 'L', 'U', or 'N' + * 'W', 'D', 'T', 'S', 'L', 'U', or 'N' * 'days' or 'day' * 'hours', 'hour', 'hr', or 'h' * 'minutes', 'minute', 'min', or 'm' diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 56939cda6d21c..9f19c7ba0be6e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -988,7 +988,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timestamp methods - def to_pydatetime(self): + def to_pydatetime(self) -> np.ndarray: """ Return Datetime Array/Index as object ndarray of datetime.datetime objects. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 749489a0a04fb..dbc0b0b3ccbbf 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -825,7 +825,7 @@ def total_seconds(self): """ return self._maybe_mask_results(1e-9 * self.asi8, fill_value=None) - def to_pytimedelta(self): + def to_pytimedelta(self) -> np.ndarray: """ Return Timedelta Array/Index as object ndarray of datetime.timedelta objects. diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index db774a03c02f8..71ae92df1970b 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -1,6 +1,8 @@ """ datetimelike delegation """ +from typing import TYPE_CHECKING + import numpy as np from pandas.core.dtypes.common import ( @@ -21,9 +23,12 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex +if TYPE_CHECKING: + from pandas import Series # noqa:F401 + class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): - def __init__(self, data, orig): + def __init__(self, data: "Series", orig): if not isinstance(data, ABCSeries): raise TypeError( f"cannot convert an object of type {type(data)} to a datetimelike index" @@ -137,7 +142,7 @@ class DatetimeProperties(Properties): Raises TypeError if the Series does not contain datetimelike values. """ - def to_pydatetime(self): + def to_pydatetime(self) -> np.ndarray: """ Return the data as an array of native Python datetime objects. @@ -209,7 +214,7 @@ class TimedeltaProperties(Properties): Raises TypeError if the Series does not contain datetimelike values. """ - def to_pytimedelta(self): + def to_pytimedelta(self) -> np.ndarray: """ Return an array of native `datetime.timedelta` objects. @@ -271,7 +276,7 @@ def components(self): 2 0 0 0 2 0 0 0 3 0 0 0 3 0 0 0 4 0 0 0 4 0 0 0 - """ # noqa: E501 + """ return self._get_values().components.set_index(self._parent.index) @property @@ -303,7 +308,7 @@ class PeriodProperties(Properties): class CombinedDatetimelikeProperties( DatetimeProperties, TimedeltaProperties, PeriodProperties ): - def __new__(cls, data): + def __new__(cls, data: "Series"): # CombinedDatetimelikeProperties isn't really instantiated. Instead # we need to choose which parent (datetime or timedelta) is # appropriate. Since we're checking the dtypes anyway, we'll just diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f621a3c153adf..c21d8df2476b3 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -168,7 +168,7 @@ def _data(self): return self._cached_data @cache_readonly - def _int64index(self): + def _int64index(self) -> Int64Index: return Int64Index._simple_new(self._data, name=self.name) def _get_data_as_items(self): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f6e79a0f2045d..98afc5ac3a0e3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -24,7 +24,6 @@ is_list_like, is_numeric_v_string_like, is_scalar, - is_sparse, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ExtensionDtype @@ -32,6 +31,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos +from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import Index, MultiIndex, ensure_index @@ -843,8 +843,8 @@ def _interleave(self) -> np.ndarray: # TODO: https://github.com/pandas-dev/pandas/issues/22791 # Give EAs some input on what happens here. Sparse needs this. - if is_sparse(dtype): - dtype = dtype.subtype # type: ignore + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype elif is_extension_array_dtype(dtype): dtype = "object" diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 4398a1569ac56..725d1adc6d161 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -981,7 +981,7 @@ def nanskew( Examples -------- >>> import pandas.core.nanops as nanops - >>> s = pd.Series([1,np.nan, 1, 2]) + >>> s = pd.Series([1, np.nan, 1, 2]) >>> nanops.nanskew(s) 1.7320508075688787 """ @@ -1065,7 +1065,7 @@ def nankurt( Examples -------- >>> import pandas.core.nanops as nanops - >>> s = pd.Series([1,np.nan, 1, 3, 2]) + >>> s = pd.Series([1, np.nan, 1, 3, 2]) >>> nanops.nankurt(s) -1.2892561983471076 """ diff --git a/pandas/core/series.py b/pandas/core/series.py index bb4e222193608..cfdd5635b60a9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -47,7 +47,6 @@ ABCMultiIndex, ABCPeriodIndex, ABCSeries, - ABCSparseArray, ) from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import ( @@ -289,9 +288,6 @@ def __init__( pass elif isinstance(data, (set, frozenset)): raise TypeError(f"'{type(data).__name__}' type is unordered") - elif isinstance(data, ABCSparseArray): - # handle sparse passed here (and force conversion) - data = data.to_dense() else: data = com.maybe_iterable_to_list(data) diff --git a/pandas/tests/frame/indexing/test_datetime.py b/pandas/tests/frame/indexing/test_datetime.py index 6bfcac3793584..0fd60c151b9c4 100644 --- a/pandas/tests/frame/indexing/test_datetime.py +++ b/pandas/tests/frame/indexing/test_datetime.py @@ -40,7 +40,7 @@ def test_set_reset(self): # set/reset df = DataFrame({"A": [0, 1, 2]}, index=idx) result = df.reset_index() - assert result["foo"].dtype, "M8[ns, US/Eastern" + assert result["foo"].dtype == "datetime64[ns, US/Eastern]" df = result.set_index("foo") tm.assert_index_equal(df.index, idx) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 1a1b7e8e1bd08..0426351310e36 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -4,7 +4,6 @@ import warnings import numpy as np -from pytz import AmbiguousTimeError from pandas._libs.algos import unique_deltas from pandas._libs.tslibs import Timedelta, Timestamp @@ -288,10 +287,7 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: index = index.values if not isinstance(index, pd.DatetimeIndex): - try: - index = pd.DatetimeIndex(index) - except AmbiguousTimeError: - index = pd.DatetimeIndex(index.asi8) + index = pd.DatetimeIndex(index) inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq() @@ -490,6 +486,7 @@ def _is_business_daily(self) -> bool: ) def _get_wom_rule(self) -> Optional[str]: + # FIXME: dont leave commented-out # wdiffs = unique(np.diff(self.index.week)) # We also need -47, -49, -48 to catch index spanning year boundary # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): diff --git a/setup.cfg b/setup.cfg index bbd8489622005..42c507a2b6b01 100644 --- a/setup.cfg +++ b/setup.cfg @@ -98,6 +98,7 @@ exclude_lines = # Don't complain if non-runnable code isn't run: if 0: if __name__ == .__main__.: + if TYPE_CHECKING: [coverage:html] directory = coverage_html_report From 721739339597d6ddd236bc056b50a86bde3d6040 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 02:33:23 -0800 Subject: [PATCH 259/388] CLN: remove is_period_arraylike, is_datetime_arraylike (#32406) --- pandas/core/dtypes/common.py | 76 +----------------------------- pandas/core/generic.py | 3 +- pandas/core/indexes/accessors.py | 15 ++---- pandas/tests/dtypes/test_common.py | 12 ----- pandas/tseries/frequencies.py | 4 +- 5 files changed, 9 insertions(+), 101 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index df5bac1071985..1afe7edf2641b 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import algos, lib +from pandas._libs import algos from pandas._libs.tslibs import conversion from pandas._typing import ArrayLike, DtypeObj @@ -19,14 +19,7 @@ PeriodDtype, registry, ) -from pandas.core.dtypes.generic import ( - ABCCategorical, - ABCDatetimeIndex, - ABCIndexClass, - ABCPeriodArray, - ABCPeriodIndex, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCCategorical, ABCIndexClass from pandas.core.dtypes.inference import ( # noqa:F401 is_array_like, is_bool, @@ -606,71 +599,6 @@ def is_excluded_dtype(dtype) -> bool: return _is_dtype(arr_or_dtype, condition) -def is_period_arraylike(arr) -> bool: - """ - Check whether an array-like is a periodical array-like or PeriodIndex. - - Parameters - ---------- - arr : array-like - The array-like to check. - - Returns - ------- - boolean - Whether or not the array-like is a periodical array-like or - PeriodIndex instance. - - Examples - -------- - >>> is_period_arraylike([1, 2, 3]) - False - >>> is_period_arraylike(pd.Index([1, 2, 3])) - False - >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) - True - """ - if isinstance(arr, (ABCPeriodIndex, ABCPeriodArray)): - return True - elif isinstance(arr, (np.ndarray, ABCSeries)): - return is_period_dtype(arr.dtype) - return getattr(arr, "inferred_type", None) == "period" - - -def is_datetime_arraylike(arr) -> bool: - """ - Check whether an array-like is a datetime array-like or DatetimeIndex. - - Parameters - ---------- - arr : array-like - The array-like to check. - - Returns - ------- - boolean - Whether or not the array-like is a datetime array-like or - DatetimeIndex. - - Examples - -------- - >>> is_datetime_arraylike([1, 2, 3]) - False - >>> is_datetime_arraylike(pd.Index([1, 2, 3])) - False - >>> is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) - True - """ - if isinstance(arr, ABCDatetimeIndex): - return True - elif isinstance(arr, (np.ndarray, ABCSeries)): - return ( - is_object_dtype(arr.dtype) - and lib.infer_dtype(arr, skipna=False) == "datetime" - ) - return getattr(arr, "inferred_type", None) == "datetime" - - def is_dtype_equal(source, target) -> bool: """ Check if two dtypes are equal. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 866b58c1ffe3d..f7196073162df 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -72,7 +72,6 @@ is_number, is_numeric_dtype, is_object_dtype, - is_period_arraylike, is_re_compilable, is_scalar, is_timedelta64_dtype, @@ -1342,7 +1341,7 @@ def __neg__(self): def __pos__(self): values = self._values - if is_bool_dtype(values) or is_period_arraylike(values): + if is_bool_dtype(values): arr = values elif ( is_numeric_dtype(values) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 71ae92df1970b..8cfe1f4ac469c 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -9,10 +9,9 @@ is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, - is_datetime_arraylike, is_integer_dtype, is_list_like, - is_period_arraylike, + is_period_dtype, is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ABCSeries @@ -50,12 +49,8 @@ def _get_values(self): elif is_timedelta64_dtype(data.dtype): return TimedeltaIndex(data, copy=False, name=self.name) - else: - if is_period_arraylike(data): - # TODO: use to_period_array - return PeriodArray(data, copy=False) - if is_datetime_arraylike(data): - return DatetimeIndex(data, copy=False, name=self.name) + elif is_period_dtype(data): + return PeriodArray(data, copy=False) raise TypeError( f"cannot convert an object of type {type(data)} to a datetimelike index" @@ -335,9 +330,7 @@ def __new__(cls, data: "Series"): return DatetimeProperties(data, orig) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(data, orig) - elif is_period_arraylike(data): + elif is_period_dtype(data): return PeriodProperties(data, orig) - elif is_datetime_arraylike(data): - return DatetimeProperties(data, orig) raise AttributeError("Can only use .dt accessor with datetimelike values") diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 8da2797835080..66bf696cbe912 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -281,18 +281,6 @@ def test_is_string_dtype(): assert com.is_string_dtype(pd.array(["a", "b"], dtype="string")) -def test_is_period_arraylike(): - assert not com.is_period_arraylike([1, 2, 3]) - assert not com.is_period_arraylike(pd.Index([1, 2, 3])) - assert com.is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) - - -def test_is_datetime_arraylike(): - assert not com.is_datetime_arraylike([1, 2, 3]) - assert not com.is_datetime_arraylike(pd.Index([1, 2, 3])) - assert com.is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) - - integer_dtypes: List = [] diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 0426351310e36..4af516af8a28e 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -19,7 +19,7 @@ from pandas.core.dtypes.common import ( is_datetime64_dtype, - is_period_arraylike, + is_period_dtype, is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ABCSeries @@ -269,7 +269,7 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: index = values inferer: _FrequencyInferer - if is_period_arraylike(index): + if is_period_dtype(index): raise TypeError( "PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq." From 777c0f90c6067c636fcd76ce003a8fbfcc311d7b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 03:02:47 -0800 Subject: [PATCH 260/388] CLN: remove unreachable _internal_get_values in blocks (#32472) --- pandas/core/internals/blocks.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 70fd3ecdc2098..c088b7020927b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -3144,14 +3144,15 @@ def _safe_reshape(arr, new_shape): return arr -def _putmask_smart(v, mask, n): +def _putmask_smart(v: np.ndarray, mask: np.ndarray, n) -> np.ndarray: """ Return a new ndarray, try to preserve dtype if possible. Parameters ---------- - v : `values`, updated in-place (array like) - mask : np.ndarray + v : np.ndarray + `values`, updated in-place. + mask : np.ndarray[bool] Applies to both sides (array like). n : `new values` either scalar or an array like aligned with `values` @@ -3218,9 +3219,6 @@ def _putmask_preserve(nv, n): # change the dtype if needed dtype, _ = maybe_promote(n.dtype) - if is_extension_array_dtype(v.dtype) and is_object_dtype(dtype): - v = v._internal_get_values(dtype) - else: - v = v.astype(dtype) + v = v.astype(dtype) return _putmask_preserve(v, n) From ed8df2d88d6caac3d83f8c803af8b895398df0c6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 03:18:57 -0800 Subject: [PATCH 261/388] REF: de-nest Series.__setitem__ (#32078) --- pandas/core/series.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index cfdd5635b60a9..568e99622dd29 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -970,14 +970,15 @@ def __setitem__(self, key, value): key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() + if key is Ellipsis: + key = slice(None) + try: self._set_with_engine(key, value) except (KeyError, ValueError): values = self._values if is_integer(key) and not self.index.inferred_type == "integer": values[key] = value - elif key is Ellipsis: - self[:] = value else: self.loc[key] = value @@ -995,9 +996,10 @@ def __setitem__(self, key, value): self._where(~key, value, inplace=True) return except InvalidIndexError: - pass + self._set_values(key.astype(np.bool_), value) - self._set_with(key, value) + else: + self._set_with(key, value) if cacher_needs_updating: self._maybe_update_cacher() @@ -1038,13 +1040,13 @@ def _set_with(self, key, value): else: key_type = lib.infer_dtype(key, skipna=False) + # Note: key_type == "boolean" should not occur because that + # should be caught by the is_bool_indexer check in __setitem__ if key_type == "integer": if self.index.inferred_type == "integer": self._set_labels(key, value) else: return self._set_values(key, value) - elif key_type == "boolean": - self._set_values(key.astype(np.bool_), value) else: self._set_labels(key, value) From edae9d1743a2d15a99712c459912a45a0335747c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Mar 2020 03:26:54 -0800 Subject: [PATCH 262/388] Implement __array__ on ExtensionIndex (#32255) --- doc/source/whatsnew/v1.1.0.rst | 2 ++ pandas/core/arrays/period.py | 7 +++++- pandas/core/indexes/category.py | 4 --- pandas/core/indexes/datetimes.py | 3 --- pandas/core/indexes/extension.py | 3 +++ pandas/core/indexes/period.py | 7 ------ pandas/tests/arrays/test_datetimelike.py | 6 ++--- pandas/tests/indexes/period/test_period.py | 29 ++++++++++++++++++++++ 8 files changed, 43 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 44deab25db695..ff9655ab7f177 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -91,6 +91,8 @@ Backwards incompatible API changes now raise a ``TypeError`` if a not-accepted keyword argument is passed into it. Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`) - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`) +- Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`) +- .. _whatsnew_110.api_breaking.indexing_raises_key_errors: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8141e2c78a7e2..5eeee644b3854 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -282,7 +282,12 @@ def freq(self): return self.dtype.freq def __array__(self, dtype=None) -> np.ndarray: - # overriding DatetimelikeArray + if dtype == "i8": + return self.asi8 + elif dtype == bool: + return ~self._isnan + + # This will raise TypeErorr for non-object dtypes return np.array(list(self), dtype=object) def __arrow_array__(self, type=None): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d43ae8eb54818..5997843f7ac6d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -364,10 +364,6 @@ def __contains__(self, key: Any) -> bool: hash(key) return contains(self, key, container=self._engine) - def __array__(self, dtype=None) -> np.ndarray: - """ the array interface, return my values """ - return np.array(self._data, dtype=dtype) - @Appender(Index.astype.__doc__) def astype(self, dtype, copy=True): if is_interval_dtype(dtype): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c9fefd46e55c7..2a1153f07f5b9 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -267,9 +267,6 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): # -------------------------------------------------------------------- - def __array__(self, dtype=None) -> np.ndarray: - return np.asarray(self._data, dtype=dtype) - @cache_readonly def _is_dates_only(self) -> bool: """ diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index daccb35864e98..7b11df15f69fb 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -224,6 +224,9 @@ def __iter__(self): # --------------------------------------------------------------------- + def __array__(self, dtype=None) -> np.ndarray: + return np.asarray(self._data, dtype=dtype) + @property def _ndarray_values(self) -> np.ndarray: return self._data._ndarray_values diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ebf69c49c029a..6a7595a6686bb 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -19,7 +19,6 @@ is_dtype_equal, is_float, is_integer, - is_integer_dtype, is_object_dtype, is_scalar, pandas_dtype, @@ -338,12 +337,6 @@ def _int64index(self) -> Int64Index: # ------------------------------------------------------------------------ # Index Methods - def __array__(self, dtype=None) -> np.ndarray: - if is_integer_dtype(dtype): - return self.asi8 - else: - return self.astype(object).values - def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. Needs additional handling as diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 17818b6ce689f..f99ee542d543c 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -687,10 +687,10 @@ def test_array_interface(self, period_index): result = np.asarray(arr, dtype=object) tm.assert_numpy_array_equal(result, expected) - # to other dtypes - with pytest.raises(TypeError): - np.asarray(arr, dtype="int64") + result = np.asarray(arr, dtype="int64") + tm.assert_numpy_array_equal(result, arr.asi8) + # to other dtypes with pytest.raises(TypeError): np.asarray(arr, dtype="float64") diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index ab3e967f12360..b4c223be0f6a5 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -681,3 +681,32 @@ def test_is_monotonic_with_nat(): assert not obj.is_monotonic_increasing assert not obj.is_monotonic_decreasing assert obj.is_unique + + +@pytest.mark.parametrize("array", [True, False]) +def test_dunder_array(array): + obj = PeriodIndex(["2000-01-01", "2001-01-01"], freq="D") + if array: + obj = obj._data + + expected = np.array([obj[0], obj[1]], dtype=object) + result = np.array(obj) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(obj) + tm.assert_numpy_array_equal(result, expected) + + expected = obj.asi8 + for dtype in ["i8", "int64", np.int64]: + result = np.array(obj, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(obj, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + for dtype in ["float64", "int32", "uint64"]: + msg = "argument must be" + with pytest.raises(TypeError, match=msg): + np.array(obj, dtype=dtype) + with pytest.raises(TypeError, match=msg): + np.array(obj, dtype=getattr(np, dtype)) From 8f51c998e84feeac6cb760a9f12baf6948cd5922 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 7 Mar 2020 14:01:23 +0200 Subject: [PATCH 263/388] TST: Removed import of itertools (#32364) --- pandas/tests/groupby/test_function.py | 33 ++++++++++++--------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index c402ca194648f..83080aa98648f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1,7 +1,6 @@ import builtins import datetime as dt from io import StringIO -from itertools import product from string import ascii_lowercase import numpy as np @@ -1296,36 +1295,32 @@ def __eq__(self, other): # -------------------------------- -def test_size(df): - grouped = df.groupby(["A", "B"]) +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +def test_size(df, by): + grouped = df.groupby(by=by) result = grouped.size() for key, group in grouped: assert result[key] == len(group) - grouped = df.groupby("A") - result = grouped.size() - for key, group in grouped: - assert result[key] == len(group) - grouped = df.groupby("B") - result = grouped.size() - for key, group in grouped: - assert result[key] == len(group) +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +@pytest.mark.parametrize("sort", [True, False]) +def test_size_sort(df, sort, by): + df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC")) + left = df.groupby(by=by, sort=sort).size() + right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0]) + tm.assert_series_equal(left, right, check_names=False) - df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("abc")) - for sort, key in product((False, True), ("a", "b", ["a", "b"])): - left = df.groupby(key, sort=sort).size() - right = df.groupby(key, sort=sort)["c"].apply(lambda a: a.shape[0]) - tm.assert_series_equal(left, right, check_names=False) - # GH11699 +def test_size_series_dataframe(): + # https://github.com/pandas-dev/pandas/issues/11699 df = DataFrame(columns=["A", "B"]) out = Series(dtype="int64", index=Index([], name="A")) tm.assert_series_equal(df.groupby("A").size(), out) def test_size_groupby_all_null(): - # GH23050 + # https://github.com/pandas-dev/pandas/issues/23050 # Assert no 'Value Error : Length of passed values is 2, index implies 0' df = DataFrame({"A": [None, None]}) # all-null groups result = df.groupby("A").size() @@ -1335,6 +1330,8 @@ def test_size_groupby_all_null(): # quantile # -------------------------------- + + @pytest.mark.parametrize( "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] ) From 3d08aa5c95e082db8d18345a9b49de6cb4b6f132 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 7 Mar 2020 19:35:20 +0100 Subject: [PATCH 264/388] TST: refactored test_factorize (#32311) --- pandas/tests/base/test_ops.py | 73 ++++++++--------------------------- 1 file changed, 17 insertions(+), 56 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index f1cc98a1b773d..c368db13b9017 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -547,66 +547,27 @@ def test_value_counts_datetime64(self, index_or_series): result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s) - def test_factorize(self): - for orig in self.objs: - o = orig.copy() - - if isinstance(o, Index) and o.is_boolean(): - exp_arr = np.array([0, 1] + [0] * 8, dtype=np.intp) - exp_uniques = o - exp_uniques = Index([False, True]) - else: - exp_arr = np.array(range(len(o)), dtype=np.intp) - exp_uniques = o - codes, uniques = o.factorize() - - tm.assert_numpy_array_equal(codes, exp_arr) - if isinstance(o, Series): - tm.assert_index_equal(uniques, Index(orig), check_names=False) - else: - # factorize explicitly resets name - tm.assert_index_equal(uniques, exp_uniques, check_names=False) - - def test_factorize_repeated(self): - for orig in self.objs: - o = orig.copy() + @pytest.mark.parametrize("sort", [True, False]) + def test_factorize(self, index_or_series_obj, sort): + obj = index_or_series_obj + result_codes, result_uniques = obj.factorize(sort=sort) - # don't test boolean - if isinstance(o, Index) and o.is_boolean(): - continue + constructor = pd.Index + if isinstance(obj, pd.MultiIndex): + constructor = pd.MultiIndex.from_tuples + expected_uniques = constructor(obj.unique()) - # sort by value, and create duplicates - if isinstance(o, Series): - o = o.sort_values() - n = o.iloc[5:].append(o) - else: - indexer = o.argsort() - o = o.take(indexer) - n = o[5:].append(o) - - exp_arr = np.array( - [5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp - ) - codes, uniques = n.factorize(sort=True) - - tm.assert_numpy_array_equal(codes, exp_arr) - if isinstance(o, Series): - tm.assert_index_equal( - uniques, Index(orig).sort_values(), check_names=False - ) - else: - tm.assert_index_equal(uniques, o, check_names=False) + if sort: + expected_uniques = expected_uniques.sort_values() - exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4], np.intp) - codes, uniques = n.factorize(sort=False) - tm.assert_numpy_array_equal(codes, exp_arr) + # construct an integer ndarray so that + # `expected_uniques.take(expected_codes)` is equal to `obj` + expected_uniques_list = list(expected_uniques) + expected_codes = [expected_uniques_list.index(val) for val in obj] + expected_codes = np.asarray(expected_codes, dtype=np.intp) - if isinstance(o, Series): - expected = Index(o.iloc[5:10].append(o.iloc[:5])) - tm.assert_index_equal(uniques, expected, check_names=False) - else: - expected = o[5:10].append(o[:5]) - tm.assert_index_equal(uniques, expected, check_names=False) + tm.assert_numpy_array_equal(result_codes, expected_codes) + tm.assert_index_equal(result_uniques, expected_uniques) def test_duplicated_drop_duplicates_index(self): # GH 4060 From 20474f585f74ba9a59886a462962391753813b91 Mon Sep 17 00:00:00 2001 From: Sandu Ursu Date: Sat, 7 Mar 2020 19:59:41 +0000 Subject: [PATCH 265/388] DOC: DataFrame.ewm docstring clean-up (#32212) --- pandas/core/window/ewm.py | 77 +++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index c6096c24ecbc9..0ec876583dcde 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -29,19 +29,25 @@ class EWM(_Rolling): r""" - Provide exponential weighted functions. + Provide exponential weighted (EW) functions. + + Available EW functions: ``mean()``, ``var()``, ``std()``, ``corr()``, ``cov()``. + + Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha`` must be + provided. Parameters ---------- com : float, optional Specify decay in terms of center of mass, - :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`. + :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. span : float, optional Specify decay in terms of span, - :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`. + :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. halflife : float, optional Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{for} halflife > 0`. + :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for + :math:`halflife > 0`. alpha : float, optional Specify smoothing factor :math:`\alpha` directly, :math:`0 < \alpha \leq 1`. @@ -50,11 +56,39 @@ class EWM(_Rolling): (otherwise result is NA). adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account - for imbalance in relative weightings - (viewing EWMA as a moving average). + for imbalance in relative weightings (viewing EWMA as a moving average). + + - When ``adjust=True`` (default), the EW function is calculated using weights + :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series + [:math:`x_0, x_1, ..., x_t`] would be: + + .. math:: + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - + \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} + + - When ``adjust=False``, the exponentially weighted function is calculated + recursively: + + .. math:: + \begin{split} + y_0 &= x_0\\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + \end{split} ignore_na : bool, default False - Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior. + Ignore missing values when calculating weights; specify ``True`` to reproduce + pre-0.15.0 behavior. + + - When ``ignore_na=False`` (default), weights are based on absolute positions. + For example, the weights of :math:`x_0` and :math:`x_2` used in calculating + the final weighted average of [:math:`x_0`, None, :math:`x_2`] are + :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and + :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. + + - When ``ignore_na=True`` (reproducing pre-0.15.0 behavior), weights are based + on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` + used in calculating the final weighted average of + [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if + ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to use. The value 0 identifies the rows, and 1 identifies the columns. @@ -71,30 +105,9 @@ class EWM(_Rolling): Notes ----- - Exactly one of center of mass, span, half-life, and alpha must be provided. - Allowed values and relationship between the parameters are specified in the - parameter descriptions above; see the link at the end of this section for - a detailed explanation. - - When adjust is True (default), weighted averages are calculated using - weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. - - When adjust is False, weighted averages are calculated recursively as: - weighted_average[0] = arg[0]; - weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. - - When ignore_na is False (default), weights are based on absolute positions. - For example, the weights of x and y used in calculating the final weighted - average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and - (1-alpha)**2 and alpha (if adjust is False). - - When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based - on relative positions. For example, the weights of x and y used in - calculating the final weighted average of [x, None, y] are 1-alpha and 1 - (if adjust is True), and 1-alpha and alpha (if adjust is False). - - More details can be found at - https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows + + More details can be found at: + :ref:`Exponentially weighted windows `. Examples -------- From 2a2258d64400b0f535502d903c9ab05b7d696af2 Mon Sep 17 00:00:00 2001 From: tonywu1999 Date: Sat, 7 Mar 2020 15:06:25 -0500 Subject: [PATCH 266/388] CI: Adding script to validate consistent and correct capitalization among headings in documentation (#26941) (#31114) --- ci/code_checks.sh | 4 + doc/source/development/contributing.rst | 14 +- scripts/validate_rst_title_capitalization.py | 206 +++++++++++++++++++ 3 files changed, 217 insertions(+), 7 deletions(-) create mode 100755 scripts/validate_rst_title_capitalization.py diff --git a/ci/code_checks.sh b/ci/code_checks.sh index e2dc543360a62..18f701c140e50 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -320,6 +320,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03,SA05 RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Validate correct capitalization among titles in documentation' ; echo $MSG + $BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/development/contributing.rst + RET=$(($RET + $?)) ; echo $MSG "DONE" + fi ### DEPENDENCIES ### diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index f904781178656..db9e23035b977 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -146,7 +146,7 @@ requires a C compiler and Python environment. If you're making documentation changes, you can skip to :ref:`contributing.documentation` but you won't be able to build the documentation locally before pushing your changes. -Using a Docker Container +Using a Docker container ~~~~~~~~~~~~~~~~~~~~~~~~ Instead of manually setting up a development environment, you can use Docker to @@ -754,7 +754,7 @@ You can then verify the changes look ok, then git :ref:`commit `_ and you should use these where applicable. This module is private for now but ultimately this should be exposed to third party libraries who want to implement type checking against pandas. @@ -919,7 +919,7 @@ For example, quite a few functions in *pandas* accept a ``dtype`` argument. This This module will ultimately house types for repeatedly used concepts like "path-like", "array-like", "numeric", etc... and can also hold aliases for commonly appearing parameters like `axis`. Development of this module is active so be sure to refer to the source for the most up to date list of available types. -Validating Type Hints +Validating type hints ~~~~~~~~~~~~~~~~~~~~~ *pandas* uses `mypy `_ to statically analyze the code base and type hints. After making any change you can ensure your type hints are correct by running @@ -1539,7 +1539,7 @@ The branch will still exist on GitHub, so to delete it there do:: .. _Gitter: https://gitter.im/pydata/pandas -Tips for a successful Pull Request +Tips for a successful pull request ================================== If you have made it to the `Review your code`_ phase, one of the core contributors may diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py new file mode 100755 index 0000000000000..17752134e5049 --- /dev/null +++ b/scripts/validate_rst_title_capitalization.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python +""" +Validate that the titles in the rst files follow the proper capitalization convention. + +Print the titles that do not follow the convention. + +Usage:: +./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst +./scripts/validate_rst_title_capitalization.py doc/source/ + +""" +import argparse +import sys +import re +import os +from typing import Tuple, Generator, List +import glob + + +CAPITALIZATION_EXCEPTIONS = { + "pandas", + "Python", + "IPython", + "PyTables", + "Excel", + "JSON", + "HTML", + "SAS", + "SQL", + "BigQuery", + "STATA", + "Interval", + "PEP8", + "Period", + "Series", + "Index", + "DataFrame", + "C", + "Git", + "GitHub", + "NumPy", + "Apache", + "Arrow", + "Parquet", + "MultiIndex", + "NumFOCUS", + "sklearn", + "Docker", +} + +CAP_EXCEPTIONS_DICT = {word.lower(): word for word in CAPITALIZATION_EXCEPTIONS} + +err_msg = "Heading capitalization formatted incorrectly. Please correctly capitalize" + +symbols = ("*", "=", "-", "^", "~", "#", '"') + + +def correct_title_capitalization(title: str) -> str: + """ + Algorithm to create the correct capitalization for a given title. + + Parameters + ---------- + title : str + Heading string to correct. + + Returns + ------- + str + Correctly capitalized heading. + """ + + # Strip all non-word characters from the beginning of the title to the + # first word character. + correct_title: str = re.sub(r"^\W*", "", title).capitalize() + + # Remove a URL from the title. We do this because words in a URL must + # stay lowercase, even if they are a capitalization exception. + removed_https_title = re.sub(r"", "", correct_title) + + # Split a title into a list using non-word character delimiters. + word_list = re.split(r"\W", removed_https_title) + + for word in word_list: + if word.lower() in CAP_EXCEPTIONS_DICT: + correct_title = re.sub( + rf"\b{word}\b", CAP_EXCEPTIONS_DICT[word.lower()], correct_title + ) + + return correct_title + + +def find_titles(rst_file: str) -> Generator[Tuple[str, int], None, None]: + """ + Algorithm to identify particular text that should be considered headings in an + RST file. + + See for details + on what constitutes a string as a heading in RST. + + Parameters + ---------- + rst_file : str + RST file to scan through for headings. + + Yields + ------- + title : str + A heading found in the rst file. + + line_number : int + The corresponding line number of the heading. + """ + + with open(rst_file, "r") as fd: + previous_line = "" + for i, line in enumerate(fd): + line = line[:-1] + line_chars = set(line) + if ( + len(line_chars) == 1 + and line_chars.pop() in symbols + and len(line) == len(previous_line) + ): + yield re.sub(r"[`\*_]", "", previous_line), i + previous_line = line + + +def find_rst_files(source_paths: List[str]) -> Generator[str, None, None]: + """ + Given the command line arguments of directory paths, this method + yields the strings of the .rst file directories that these paths contain. + + Parameters + ---------- + source_paths : str + List of directories to validate, provided through command line arguments. + + Yields + ------- + str + Directory address of a .rst files found in command line argument directories. + """ + + for directory_address in source_paths: + if not os.path.exists(directory_address): + raise ValueError( + "Please enter a valid path, pointing to a valid file/directory." + ) + elif directory_address.endswith(".rst"): + yield directory_address + else: + for filename in glob.glob( + pathname=f"{directory_address}/**/*.rst", recursive=True + ): + yield filename + + +def main(source_paths: List[str], output_format: str) -> bool: + """ + The main method to print all headings with incorrect capitalization. + + Parameters + ---------- + source_paths : str + List of directories to validate, provided through command line arguments. + output_format : str + Output format of the script. + + Returns + ------- + int + Number of incorrect headings found overall. + """ + + number_of_errors: int = 0 + + for filename in find_rst_files(source_paths): + for title, line_number in find_titles(filename): + if title != correct_title_capitalization(title): + print( + f"""{filename}:{line_number}:{err_msg} "{title}" to "{ + correct_title_capitalization(title)}" """ + ) + number_of_errors += 1 + + return number_of_errors + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Validate heading capitalization") + + parser.add_argument( + "paths", nargs="+", default=".", help="Source paths of file/directory to check." + ) + + parser.add_argument( + "--format", + "-f", + default="{source_path}:{line_number}:{msg}:{heading}:{correct_heading}", + help="Output format of incorrectly capitalized titles", + ) + + args = parser.parse_args() + + sys.exit(main(args.paths, args.format)) From 6852012805bd4cc878a29bbbe7c38b7ea0b980fd Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 7 Mar 2020 22:10:20 +0200 Subject: [PATCH 267/388] DOC: Fix examples in documentation (#31472) --- ci/code_checks.sh | 16 ++- pandas/core/base.py | 32 +++--- pandas/core/construction.py | 5 +- pandas/core/generic.py | 195 +++++++++++++++++++++++++++--------- 4 files changed, 180 insertions(+), 68 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 18f701c140e50..e6a761b91f353 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -267,11 +267,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then -k"-nonzero -reindex -searchsorted -to_dict" RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests generic.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests groupby.py' ; echo $MSG pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" RET=$(($RET + $?)) ; echo $MSG "DONE" @@ -311,6 +306,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/arrays/boolean.py RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests base.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/base.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests construction.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/construction.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests generic.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/generic.py + RET=$(($RET + $?)) ; echo $MSG "DONE" fi ### DOCSTRINGS ### diff --git a/pandas/core/base.py b/pandas/core/base.py index 508582540e169..478b83f538b7d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1473,41 +1473,49 @@ def factorize(self, sort=False, na_sentinel=-1): Examples -------- - >>> x = pd.Series([1, 2, 3]) - >>> x + >>> ser = pd.Series([1, 2, 3]) + >>> ser 0 1 1 2 2 3 dtype: int64 - >>> x.searchsorted(4) + >>> ser.searchsorted(4) 3 - >>> x.searchsorted([0, 4]) + >>> ser.searchsorted([0, 4]) array([0, 3]) - >>> x.searchsorted([1, 3], side='left') + >>> ser.searchsorted([1, 3], side='left') array([0, 2]) - >>> x.searchsorted([1, 3], side='right') + >>> ser.searchsorted([1, 3], side='right') array([1, 3]) - >>> x = pd.Categorical(['apple', 'bread', 'bread', - 'cheese', 'milk'], ordered=True) + >>> ser = pd.Categorical( + ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True + ... ) + >>> ser [apple, bread, bread, cheese, milk] Categories (4, object): [apple < bread < cheese < milk] - >>> x.searchsorted('bread') + >>> ser.searchsorted('bread') 1 - >>> x.searchsorted(['bread'], side='right') + >>> ser.searchsorted(['bread'], side='right') array([3]) If the values are not monotonically sorted, wrong locations may be returned: - >>> x = pd.Series([2, 1, 3]) - >>> x.searchsorted(1) + >>> ser = pd.Series([2, 1, 3]) + >>> ser + 0 2 + 1 1 + 2 3 + dtype: int64 + + >>> ser.searchsorted(1) # doctest: +SKIP 0 # wrong result, correct would be 1 """ diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f947a1fda49f1..e2d8fba8d4148 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -4,6 +4,7 @@ These should not depend on core.internals. """ + from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np @@ -200,12 +201,12 @@ def array( >>> pd.array([1, 2, np.nan]) - [1, 2, NaN] + [1, 2, ] Length: 3, dtype: Int64 >>> pd.array(["a", None, "c"]) - ['a', nan, 'c'] + ['a', , 'c'] Length: 3, dtype: string >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f7196073162df..e6c5ac9dbf733 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1212,7 +1212,7 @@ def _set_axis_name(self, name, axis=0, inplace=False): >>> df.index = pd.MultiIndex.from_product( ... [["mammal"], ['dog', 'cat', 'monkey']]) >>> df._set_axis_name(["type", "name"]) - legs + num_legs type name mammal dog 4 cat 4 @@ -2184,45 +2184,141 @@ def to_json( Examples -------- - >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], - ... index=['row 1', 'row 2'], - ... columns=['col 1', 'col 2']) - >>> df.to_json(orient='split') - '{"columns":["col 1","col 2"], - "index":["row 1","row 2"], - "data":[["a","b"],["c","d"]]}' + >>> import json + >>> df = pd.DataFrame( + ... [["a", "b"], ["c", "d"]], + ... index=["row 1", "row 2"], + ... columns=["col 1", "col 2"], + ... ) + + >>> result = df.to_json(orient="split") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "columns": [ + "col 1", + "col 2" + ], + "index": [ + "row 1", + "row 2" + ], + "data": [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] + } Encoding/decoding a Dataframe using ``'records'`` formatted JSON. Note that index labels are not preserved with this encoding. - >>> df.to_json(orient='records') - '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + >>> result = df.to_json(orient="records") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + { + "col 1": "a", + "col 2": "b" + }, + { + "col 1": "c", + "col 2": "d" + } + ] Encoding/decoding a Dataframe using ``'index'`` formatted JSON: - >>> df.to_json(orient='index') - '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + >>> result = df.to_json(orient="index") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "row 1": { + "col 1": "a", + "col 2": "b" + }, + "row 2": { + "col 1": "c", + "col 2": "d" + } + } Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: - >>> df.to_json(orient='columns') - '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}' + >>> result = df.to_json(orient="columns") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "col 1": { + "row 1": "a", + "row 2": "c" + }, + "col 2": { + "row 1": "b", + "row 2": "d" + } + } Encoding/decoding a Dataframe using ``'values'`` formatted JSON: - >>> df.to_json(orient='values') - '[["a","b"],["c","d"]]' - - Encoding with Table Schema + >>> result = df.to_json(orient="values") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] - >>> df.to_json(orient='table') - '{"schema": {"fields": [{"name": "index", "type": "string"}, - {"name": "col 1", "type": "string"}, - {"name": "col 2", "type": "string"}], - "primaryKey": "index", - "pandas_version": "0.20.0"}, - "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, - {"index": "row 2", "col 1": "c", "col 2": "d"}]}' + Encoding with Table Schema: + + >>> result = df.to_json(orient="table") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "schema": { + "fields": [ + { + "name": "index", + "type": "string" + }, + { + "name": "col 1", + "type": "string" + }, + { + "name": "col 2", + "type": "string" + } + ], + "primaryKey": [ + "index" + ], + "pandas_version": "0.20.0" + }, + "data": [ + { + "index": "row 1", + "col 1": "a", + "col 2": "b" + }, + { + "index": "row 2", + "col 1": "c", + "col 2": "d" + } + ] + } """ from pandas.io import json @@ -2649,7 +2745,8 @@ def to_clipboard( Copy the contents of a DataFrame to the clipboard. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) - >>> df.to_clipboard(sep=',') + + >>> df.to_clipboard(sep=',') # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # ,A,B,C ... # 0,1,2,3 @@ -2658,7 +2755,7 @@ def to_clipboard( We can omit the index by passing the keyword `index` and setting it to false. - >>> df.to_clipboard(sep=',', index=False) + >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # A,B,C ... # 1,2,3 @@ -4883,18 +4980,17 @@ def sample( Notes ----- - Use ``.pipe`` when chaining together functions that expect Series, DataFrames or GroupBy objects. Instead of writing - >>> f(g(h(df), arg1=a), arg2=b, arg3=c) + >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP You can write >>> (df.pipe(h) ... .pipe(g, arg1=a) - ... .pipe(f, arg2=b, arg3=c) - ... ) + ... .pipe(func, arg2=b, arg3=c) + ... ) # doctest: +SKIP If you have a function that takes the data as (say) the second argument, pass a tuple indicating which keyword expects the @@ -4902,8 +4998,8 @@ def sample( >>> (df.pipe(h) ... .pipe(g, arg1=a) - ... .pipe((f, 'arg2'), arg1=a, arg3=c) - ... ) + ... .pipe((func, 'arg2'), arg1=a, arg3=c) + ... ) # doctest: +SKIP """ @Appender(_shared_docs["pipe"] % _shared_doc_kwargs) @@ -5252,7 +5348,7 @@ def values(self) -> np.ndarray: dtype: object >>> df.values array([[ 3, 94, 31], - [ 29, 170, 115]], dtype=int64) + [ 29, 170, 115]]) A DataFrame with mixed type columns(e.g., str/object, int64, float32) results in an ndarray of the broadest type that accommodates these @@ -9521,12 +9617,13 @@ def describe( ... np.datetime64("2010-01-01") ... ]) >>> s.describe() - count 3 - unique 2 - top 2010-01-01 00:00:00 - freq 2 - first 2000-01-01 00:00:00 - last 2010-01-01 00:00:00 + count 3 + mean 2006-09-01 08:00:00 + min 2000-01-01 00:00:00 + 25% 2004-12-31 12:00:00 + 50% 2010-01-01 00:00:00 + 75% 2010-01-01 00:00:00 + max 2010-01-01 00:00:00 dtype: object Describing a ``DataFrame``. By default only numeric fields @@ -9549,11 +9646,11 @@ def describe( Describing all columns of a ``DataFrame`` regardless of data type. - >>> df.describe(include='all') - categorical numeric object + >>> df.describe(include='all') # doctest: +SKIP + categorical numeric object count 3 3.0 3 unique 3 NaN 3 - top f NaN c + top f NaN a freq 1 NaN 1 mean NaN 2.0 NaN std NaN 1.0 NaN @@ -9592,11 +9689,11 @@ def describe( Including only string columns in a ``DataFrame`` description. - >>> df.describe(include=[np.object]) + >>> df.describe(include=[np.object]) # doctest: +SKIP object count 3 unique 3 - top c + top a freq 1 Including only categorical columns from a ``DataFrame`` description. @@ -9610,16 +9707,16 @@ def describe( Excluding numeric columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.number]) + >>> df.describe(exclude=[np.number]) # doctest: +SKIP categorical object count 3 3 unique 3 3 - top f c + top f a freq 1 1 Excluding object columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.object]) + >>> df.describe(exclude=[np.object]) # doctest: +SKIP categorical numeric count 3 3.0 unique 3 NaN From 684a2911af691efa41b3df8ad3e9f95f6dc6ad6f Mon Sep 17 00:00:00 2001 From: William <31226068+Hori75@users.noreply.github.com> Date: Sun, 8 Mar 2020 04:01:13 +0700 Subject: [PATCH 268/388] DOC: Update the pandas.DatetimeIndex docstring (#32360) --- pandas/core/indexes/datetimes.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2a1153f07f5b9..185ad8e4c365a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -78,21 +78,26 @@ def _new_DatetimeIndex(cls, d): ) class DatetimeIndex(DatetimeTimedeltaMixin): """ - Immutable ndarray of datetime64 data, represented internally as int64, and - which can be boxed to Timestamp objects that are subclasses of datetime and - carry metadata such as frequency information. + Immutable ndarray-like of datetime64 data. + + Represented internally as int64, and which can be boxed to Timestamp objects + that are subclasses of datetime and carry metadata. Parameters ---------- data : array-like (1-dimensional), optional Optional datetime-like data to construct index with. - copy : bool - Make a copy of input ndarray. freq : str or pandas offset object, optional One of pandas date offset strings or corresponding objects. The string 'infer' can be passed in order to set the frequency of the index as the inferred frequency upon creation. - tz : pytz.timezone or dateutil.tz.tzfile + tz : pytz.timezone or dateutil.tz.tzfile or datetime.tzinfo or str + Set the Timezone of the data. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + closed : {'left', 'right'}, optional + Set whether to include `start` and `end` that are on the + boundary. The default includes boundary points on either end. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' When clocks moved backward due to DST, ambiguous times may arise. For example in Central European Time (UTC+01), when going from 03:00 @@ -107,12 +112,16 @@ class DatetimeIndex(DatetimeTimedeltaMixin): times) - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times. - name : object - Name to be stored in the index. dayfirst : bool, default False If True, parse dates in `data` with the day first order. yearfirst : bool, default False If True parse dates in `data` with the year first order. + dtype : numpy.dtype or DatetimeTZDtype or str, default None + Note that the only NumPy dtype allowed is ‘datetime64[ns]’. + copy : bool, default False + Make a copy of input ndarray. + name : label, default None + Name to be stored in the index. Attributes ---------- From 00f182500e67918e67b58d9dc757bb7a4302ab6a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 08:39:06 -0700 Subject: [PATCH 269/388] BUG: PeriodIndex.asof_locs (#32310) --- pandas/core/indexes/period.py | 15 ++++++------ pandas/tests/frame/methods/test_asof.py | 22 +++++++++++++++++- pandas/tests/indexes/period/test_indexing.py | 24 ++++++++++++++++++++ pandas/tests/series/methods/test_asof.py | 7 ++++++ 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6a7595a6686bb..9ab80c0b6145c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -370,27 +370,26 @@ def __array_wrap__(self, result, context=None): # cannot pass _simple_new as it is return type(self)(result, freq=self.freq, name=self.name) - def asof_locs(self, where, mask): + def asof_locs(self, where, mask: np.ndarray) -> np.ndarray: """ where : array of timestamps mask : array of booleans where data is not NA - """ where_idx = where if isinstance(where_idx, DatetimeIndex): where_idx = PeriodIndex(where_idx.values, freq=self.freq) + elif not isinstance(where_idx, PeriodIndex): + raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") + elif where_idx.freq != self.freq: + raise raise_on_incompatible(self, where_idx) - locs = self._ndarray_values[mask].searchsorted( - where_idx._ndarray_values, side="right" - ) + locs = self.asi8[mask].searchsorted(where_idx.asi8, side="right") locs = np.where(locs > 0, locs - 1, 0) result = np.arange(len(self))[mask].take(locs) first = mask.argmax() - result[ - (locs == 0) & (where_idx._ndarray_values < self._ndarray_values[first]) - ] = -1 + result[(locs == 0) & (where_idx.asi8 < self.asi8[first])] = -1 return result diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index e2b417972638e..c68a547a3755a 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -1,7 +1,17 @@ import numpy as np import pytest -from pandas import DataFrame, Period, Series, Timestamp, date_range, to_datetime +from pandas._libs.tslibs import IncompatibleFrequency + +from pandas import ( + DataFrame, + Period, + Series, + Timestamp, + date_range, + period_range, + to_datetime, +) import pandas._testing as tm @@ -156,3 +166,13 @@ def test_is_copy(self, date_range_frame): with tm.assert_produces_warning(None): result["C"] = 1 + + def test_asof_periodindex_mismatched_freq(self): + N = 50 + rng = period_range("1/1/1990", periods=N, freq="H") + df = DataFrame(np.random.randn(N), index=rng) + + # Mismatched freq + msg = "Input has different freq" + with pytest.raises(IncompatibleFrequency, match=msg): + df.asof(rng.asfreq("D")) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 077fa2a0b1c56..a4c6764d065c9 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -795,3 +795,27 @@ def test_period_index_indexer(self): tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) tm.assert_frame_equal(df, df.loc[list(idx)]) + + +class TestAsOfLocs: + def test_asof_locs_mismatched_type(self): + dti = pd.date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + pi2 = dti.to_period("H") + + mask = np.array([0, 1, 0], dtype=bool) + + msg = "must be DatetimeIndex or PeriodIndex" + with pytest.raises(TypeError, match=msg): + pi.asof_locs(pd.Int64Index(pi.asi8), mask) + + with pytest.raises(TypeError, match=msg): + pi.asof_locs(pd.Float64Index(pi.asi8), mask) + + with pytest.raises(TypeError, match=msg): + # TimedeltaIndex + pi.asof_locs(dti - dti, mask) + + msg = "Input has different freq=H" + with pytest.raises(libperiod.IncompatibleFrequency, match=msg): + pi.asof_locs(pi2, mask) diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index b121efd202744..d6462e13b1ce7 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._libs.tslibs import IncompatibleFrequency + from pandas import Series, Timestamp, date_range, isna, notna, offsets import pandas._testing as tm @@ -132,6 +134,11 @@ def test_periodindex(self): d = ts.index[0].to_timestamp() - offsets.BDay() assert isna(ts.asof(d)) + # Mismatched freq + msg = "Input has different freq" + with pytest.raises(IncompatibleFrequency, match=msg): + ts.asof(rng.asfreq("D")) + def test_errors(self): s = Series( From aa27b9a69fda2a38438afc5bbdcec441f997af9f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 08:40:34 -0700 Subject: [PATCH 270/388] CLN: avoid _ndarray_values, values in MultiIndex (#32452) --- pandas/core/indexes/multi.py | 46 ++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c1efa512f326a..c22b289bb4017 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -18,7 +18,7 @@ from pandas._libs import algos as libalgos, index as libindex, lib from pandas._libs.hashtable import duplicated_int64 -from pandas._typing import AnyArrayLike, ArrayLike, Scalar +from pandas._typing import AnyArrayLike, Scalar from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly @@ -52,6 +52,7 @@ ensure_index, ) from pandas.core.indexes.frozen import FrozenList +from pandas.core.indexes.numeric import Int64Index import pandas.core.missing as missing from pandas.core.sorting import ( get_group_index, @@ -1180,7 +1181,7 @@ def _format_native_types(self, na_rep="nan", **kwargs): sortorder=self.sortorder, verify_integrity=False, ) - return mi.values + return mi._values def format( self, @@ -1419,7 +1420,7 @@ def is_monotonic_increasing(self) -> bool: except TypeError: # we have mixed types and np.lexsort is not happy - return Index(self.values).is_monotonic + return Index(self._values).is_monotonic @cache_readonly def is_monotonic_decreasing(self) -> bool: @@ -1612,7 +1613,7 @@ def to_flat_index(self): ('bar', 'baz'), ('bar', 'qux')], dtype='object') """ - return Index(self.values, tupleize_cols=False) + return Index(self._values, tupleize_cols=False) @property def is_all_dates(self) -> bool: @@ -1914,7 +1915,7 @@ def append(self, other): arrays.append(label.append(appended)) return MultiIndex.from_arrays(arrays, names=self.names) - to_concat = (self.values,) + tuple(k._values for k in other) + to_concat = (self._values,) + tuple(k._values for k in other) new_tuples = np.concatenate(to_concat) # if all(isinstance(x, MultiIndex) for x in other): @@ -1924,7 +1925,7 @@ def append(self, other): return Index(new_tuples) def argsort(self, *args, **kwargs) -> np.ndarray: - return self.values.argsort(*args, **kwargs) + return self._values.argsort(*args, **kwargs) @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) def repeat(self, repeats, axis=None): @@ -2368,7 +2369,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): # let's instead try with a straight Index if method is None: - return Index(self.values).get_indexer( + return Index(self._values).get_indexer( target, method=method, limit=limit, tolerance=tolerance ) @@ -2831,7 +2832,8 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): mapper = Series(indexer) indexer = codes.take(ensure_platform_int(indexer)) result = Series(Index(indexer).isin(r).nonzero()[0]) - m = result.map(mapper)._ndarray_values + m = result.map(mapper) + m = np.asarray(m) else: m = np.zeros(len(codes), dtype=bool) @@ -2949,7 +2951,7 @@ def get_locs(self, seq): n = len(self) indexer = None - def _convert_to_indexer(r): + def _convert_to_indexer(r) -> Int64Index: # return an indexer if isinstance(r, slice): m = np.zeros(n, dtype=bool) @@ -3026,13 +3028,16 @@ def _update_indexer(idxr, indexer=indexer): if indexer is None: return np.array([], dtype=np.int64) + assert isinstance(indexer, Int64Index), type(indexer) indexer = self._reorder_indexer(seq, indexer) - return indexer._ndarray_values + return indexer._values def _reorder_indexer( - self, seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], indexer: ArrayLike - ) -> ArrayLike: + self, + seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], + indexer: Int64Index, + ) -> Int64Index: """ Reorder an indexer of a MultiIndex (self) so that the label are in the same order as given in seq @@ -3139,8 +3144,8 @@ def equals(self, other) -> bool: if self.nlevels != other.nlevels: return False - other_vals = com.values_from_object(ensure_index(other)) - return array_equivalent(self._ndarray_values, other_vals) + other_vals = com.values_from_object(other) + return array_equivalent(self._values, other_vals) if self.nlevels != other.nlevels: return False @@ -3232,7 +3237,7 @@ def union(self, other, sort=None): # TODO: Index.union returns other when `len(self)` is 0. uniq_tuples = lib.fast_unique_multiple( - [self._ndarray_values, other._ndarray_values], sort=sort + [self._values, other._ndarray_values], sort=sort ) return MultiIndex.from_arrays( @@ -3267,7 +3272,7 @@ def intersection(self, other, sort=False): if self.equals(other): return self - lvals = self._ndarray_values + lvals = self._values rvals = other._ndarray_values uniq_tuples = None # flag whether _inner_indexer was succesful @@ -3342,7 +3347,7 @@ def difference(self, other, sort=None): indexer = indexer.take((indexer != -1).nonzero()[0]) label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) - difference = this.values.take(label_diff) + difference = this._values.take(label_diff) if sort is None: difference = sorted(difference) @@ -3359,7 +3364,8 @@ def difference(self, other, sort=None): def _convert_can_do_setop(self, other): result_names = self.names - if not hasattr(other, "names"): + if not isinstance(other, Index): + if len(other) == 0: other = MultiIndex( levels=[[]] * self.nlevels, @@ -3456,8 +3462,8 @@ def _wrap_joined_index(self, joined, other): @Appender(Index.isin.__doc__) def isin(self, values, level=None): if level is None: - values = MultiIndex.from_tuples(values, names=self.names).values - return algos.isin(self.values, values) + values = MultiIndex.from_tuples(values, names=self.names)._values + return algos.isin(self._values, values) else: num = self._get_level_number(level) levs = self.get_level_values(num) From 957fc3c43808773b247696fcf24eee255fd24230 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 08:45:25 -0700 Subject: [PATCH 271/388] BUG/DEPR: loc.__setitem__ incorrectly accepting positional slices (#31840) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/indexes/base.py | 10 +++++ pandas/tests/frame/conftest.py | 8 ++-- pandas/tests/frame/indexing/test_indexing.py | 10 ++--- pandas/tests/frame/methods/test_asof.py | 6 +-- pandas/tests/frame/test_analytics.py | 8 ++-- pandas/tests/frame/test_apply.py | 2 +- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/frame/test_cumulative.py | 24 +++++------ pandas/tests/frame/test_to_csv.py | 2 +- pandas/tests/indexing/test_loc.py | 40 +++++++++++++++++++ pandas/tests/io/pytables/test_store.py | 34 ++++++++-------- pandas/tests/series/indexing/test_datetime.py | 4 +- pandas/tests/series/methods/test_asof.py | 12 +++--- pandas/tests/series/test_arithmetic.py | 2 +- 15 files changed, 108 insertions(+), 58 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index ff9655ab7f177..476ceed00fabd 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -172,7 +172,7 @@ Deprecations ~~~~~~~~~~~~ - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) -- +- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5555b99a0ef88..3eab757311ccb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3137,8 +3137,18 @@ def is_int(v): pass if com.is_null_slice(key): + # It doesn't matter if we are positional or label based indexer = key elif is_positional: + if kind == "loc": + # GH#16121, GH#24612, GH#31810 + warnings.warn( + "Slicing a positional slice with .loc is not supported, " + "and will raise TypeError in a future version. " + "Use .loc with labels or .iloc with positions instead.", + FutureWarning, + stacklevel=6, + ) indexer = key else: indexer = self.slice_indexer(start, stop, step, kind=kind) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 03598b6bb5eca..e89b2c6f1fec0 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -40,8 +40,8 @@ def float_frame_with_na(): """ df = DataFrame(tm.getSeriesData()) # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan return df @@ -74,8 +74,8 @@ def bool_frame_with_na(): df = DataFrame(tm.getSeriesData()) > 0 df = df.astype(object) # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan return df diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index b0cb988720c25..ade17860a99b7 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1209,7 +1209,7 @@ def test_setitem_frame_mixed(self, float_string_frame): piece = DataFrame( [[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"] ) - key = (slice(None, 2), ["A", "B"]) + key = (f.index[slice(None, 2)], ["A", "B"]) f.loc[key] = piece tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values) @@ -1220,7 +1220,7 @@ def test_setitem_frame_mixed(self, float_string_frame): index=list(f.index[0:2]) + ["foo", "bar"], columns=["A", "B"], ) - key = (slice(None, 2), ["A", "B"]) + key = (f.index[slice(None, 2)], ["A", "B"]) f.loc[key] = piece tm.assert_almost_equal( f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2] @@ -1230,7 +1230,7 @@ def test_setitem_frame_mixed(self, float_string_frame): f = float_string_frame.copy() piece = f.loc[f.index[:2], ["A"]] piece.index = f.index[-2:] - key = (slice(-2, None), ["A", "B"]) + key = (f.index[slice(-2, None)], ["A", "B"]) f.loc[key] = piece piece["B"] = np.nan tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) @@ -1238,7 +1238,7 @@ def test_setitem_frame_mixed(self, float_string_frame): # ndarray f = float_string_frame.copy() piece = float_string_frame.loc[f.index[:2], ["A", "B"]] - key = (slice(-2, None), ["A", "B"]) + key = (f.index[slice(-2, None)], ["A", "B"]) f.loc[key] = piece.values tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) @@ -1873,7 +1873,7 @@ def test_setitem_datetimelike_with_inference(self): df = DataFrame(index=date_range("20130101", periods=4)) df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]") df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]") - df.loc[:3, "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]") + df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]") df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]") df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]") df["F"] = np.timedelta64("NaT") diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index c68a547a3755a..70b42976c95a7 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -31,7 +31,7 @@ class TestFrameAsof: def test_basic(self, date_range_frame): df = date_range_frame N = 50 - df.loc[15:30, "A"] = np.nan + df.loc[df.index[15:30], "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = df.asof(dates) @@ -51,7 +51,7 @@ def test_basic(self, date_range_frame): def test_subset(self, date_range_frame): N = 10 df = date_range_frame.iloc[:N].copy() - df.loc[4:8, "A"] = np.nan + df.loc[df.index[4:8], "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") # with a subset of A should be the same @@ -159,7 +159,7 @@ def test_is_copy(self, date_range_frame): # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings df = date_range_frame N = 50 - df.loc[15:30, "A"] = np.nan + df.loc[df.index[15:30], "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = df.asof(dates) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 61802956addeb..07e30d41c216d 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -913,8 +913,8 @@ def test_sum_bools(self): def test_idxmin(self, float_frame, int_frame): frame = float_frame - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: for df in [frame, int_frame]: @@ -928,8 +928,8 @@ def test_idxmin(self, float_frame, int_frame): def test_idxmax(self, float_frame, int_frame): frame = float_frame - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: for df in [frame, int_frame]: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index fe6abef97acc4..11705cd77a325 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -339,7 +339,7 @@ def test_apply_yield_list(self, float_frame): tm.assert_frame_equal(result, float_frame) def test_apply_reduce_Series(self, float_frame): - float_frame.loc[::2, "A"] = np.nan + float_frame["A"].iloc[::2] = np.nan expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index a5f5e6f36cd58..e67fef9efef6d 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -478,7 +478,7 @@ def test_convert_objects(self, float_string_frame): length = len(float_string_frame) float_string_frame["J"] = "1." float_string_frame["K"] = "1" - float_string_frame.loc[0:5, ["J", "K"]] = "garbled" + float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled" converted = float_string_frame._convert(datetime=True, numeric=True) assert converted["H"].dtype == "float64" assert converted["I"].dtype == "int64" diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 486cbfb2761e0..1b7e70dd28c63 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -23,9 +23,9 @@ def test_cumsum_corner(self): result = dm.cumsum() # noqa def test_cumsum(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cumsum = datetime_frame.cumsum() @@ -46,9 +46,9 @@ def test_cumsum(self, datetime_frame): assert np.shape(cumsum_xs) == np.shape(datetime_frame) def test_cumprod(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cumprod = datetime_frame.cumprod() @@ -80,9 +80,9 @@ def test_cumprod(self, datetime_frame): strict=False, ) def test_cummin(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cummin = datetime_frame.cummin() @@ -108,9 +108,9 @@ def test_cummin(self, datetime_frame): strict=False, ) def test_cummax(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cummax = datetime_frame.cummax() diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 86c9a98377f3f..cec2bd4b634c1 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -761,7 +761,7 @@ def create_cols(name): ) # add in some nans - df_float.loc[30:50, 1:3] = np.nan + df_float.iloc[30:50, 1:3] = np.nan # ## this is a bug in read_csv right now #### # df_dt.loc[30:50,1:3] = np.nan diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4d042af8d59b4..3073fe085de15 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -863,6 +863,7 @@ def test_loc_setitem_empty_append_raises(self): data = [1, 2] df = DataFrame(columns=["x", "y"]) + df.index = df.index.astype(np.int64) msg = ( r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " r"are in the \[index\]" @@ -975,3 +976,42 @@ def test_loc_mixed_int_float(): result = ser.loc[1] assert result == 0 + + +def test_loc_with_positional_slice_deprecation(): + # GH#31840 + ser = pd.Series(range(4), index=["A", "B", "C", "D"]) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + ser.loc[:3] = 2 + + expected = pd.Series([2, 2, 2, 3], index=["A", "B", "C", "D"]) + tm.assert_series_equal(ser, expected) + + +def test_loc_slice_disallows_positional(): + # GH#16121, GH#24612, GH#31810 + dti = pd.date_range("2016-01-01", periods=3) + df = pd.DataFrame(np.random.random((3, 2)), index=dti) + + ser = df[0] + + msg = ( + "cannot do slice indexing on DatetimeIndex with these " + r"indexers \[1\] of type int" + ) + + for obj in [df, ser]: + with pytest.raises(TypeError, match=msg): + obj.loc[1:3] + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # GH#31840 deprecated incorrect behavior + obj.loc[1:3] = 1 + + with pytest.raises(TypeError, match=msg): + df.loc[1:3, 1] + + with tm.assert_produces_warning(FutureWarning): + # GH#31840 deprecated incorrect behavior + df.loc[1:3, 1] = 2 diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index fd585a73f6ce6..fcc48daa59a40 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -342,7 +342,7 @@ def test_repr(self, setup_path): df["timestamp2"] = Timestamp("20010103") df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) - df.loc[3:6, ["obj1"]] = np.nan + df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) with catch_warnings(record=True): @@ -846,7 +846,7 @@ def test_put_mixed_type(self, setup_path): df["timestamp2"] = Timestamp("20010103") df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) - df.loc[3:6, ["obj1"]] = np.nan + df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) with ensure_clean_store(setup_path) as store: @@ -1372,11 +1372,11 @@ def check_col(key, name, size): _maybe_remove(store, "df") df = tm.makeTimeDataFrame() df["string"] = "foo" - df.loc[1:4, "string"] = np.nan + df.loc[df.index[1:4], "string"] = np.nan df["string2"] = "bar" - df.loc[4:8, "string2"] = np.nan + df.loc[df.index[4:8], "string2"] = np.nan df["string3"] = "bah" - df.loc[1:, "string3"] = np.nan + df.loc[df.index[1:], "string3"] = np.nan store.append("df", df) result = store.select("df") tm.assert_frame_equal(result, df) @@ -1492,8 +1492,8 @@ def test_append_with_data_columns(self, setup_path): # data column selection with a string data_column df_new = df.copy() df_new["string"] = "foo" - df_new.loc[1:4, "string"] = np.nan - df_new.loc[5:6, "string"] = "bar" + df_new.loc[df_new.index[1:4], "string"] = np.nan + df_new.loc[df_new.index[5:6], "string"] = "bar" _maybe_remove(store, "df") store.append("df", df_new, data_columns=["string"]) result = store.select("df", "string='foo'") @@ -1574,12 +1574,12 @@ def check_col(key, name, size): # doc example df_dc = df.copy() df_dc["string"] = "foo" - df_dc.loc[4:6, "string"] = np.nan - df_dc.loc[7:9, "string"] = "bar" + df_dc.loc[df_dc.index[4:6], "string"] = np.nan + df_dc.loc[df_dc.index[7:9], "string"] = "bar" df_dc["string2"] = "cool" df_dc["datetime"] = Timestamp("20010102") df_dc = df_dc._convert(datetime=True) - df_dc.loc[3:5, ["A", "B", "datetime"]] = np.nan + df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan _maybe_remove(store, "df_dc") store.append( @@ -1602,8 +1602,8 @@ def check_col(key, name, size): np.random.randn(8, 3), index=index, columns=["A", "B", "C"] ) df_dc["string"] = "foo" - df_dc.loc[4:6, "string"] = np.nan - df_dc.loc[7:9, "string"] = "bar" + df_dc.loc[df_dc.index[4:6], "string"] = np.nan + df_dc.loc[df_dc.index[7:9], "string"] = "bar" df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs() df_dc["string2"] = "cool" @@ -2024,7 +2024,7 @@ def test_table_mixed_dtypes(self, setup_path): df["timestamp2"] = Timestamp("20010103") df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) - df.loc[3:6, ["obj1"]] = np.nan + df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) with ensure_clean_store(setup_path) as store: @@ -2200,7 +2200,7 @@ def test_invalid_terms(self, setup_path): df = tm.makeTimeDataFrame() df["string"] = "foo" - df.loc[0:4, "string"] = "bar" + df.loc[df.index[0:4], "string"] = "bar" store.put("df", df, format="table") @@ -3343,7 +3343,7 @@ def test_string_select(self, setup_path): # test string ==/!= df["x"] = "none" - df.loc[2:7, "x"] = "" + df.loc[df.index[2:7], "x"] = "" store.append("df", df, data_columns=["x"]) @@ -3365,7 +3365,7 @@ def test_string_select(self, setup_path): # int ==/!= df["int"] = 1 - df.loc[2:7, "int"] = 2 + df.loc[df.index[2:7], "int"] = 2 store.append("df3", df, data_columns=["int"]) @@ -3419,7 +3419,7 @@ def test_read_column(self, setup_path): # a data column with NaNs, result excludes the NaNs df3 = df.copy() df3["string"] = "foo" - df3.loc[4:6, "string"] = np.nan + df3.loc[df3.index[4:6], "string"] = np.nan store.append("df3", df3, data_columns=["string"]) result = store.select_column("df3", "string") tm.assert_almost_equal(result.values, df3["string"].values) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index fc9d4ec5290a5..b5d04fd499c08 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -293,7 +293,7 @@ def test_getitem_setitem_datetimeindex(): result = ts.copy() result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] + result.iloc[4:8] = ts.iloc[4:8] tm.assert_series_equal(result, ts) # also test partial date slicing @@ -349,7 +349,7 @@ def test_getitem_setitem_periodindex(): result = ts.copy() result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] + result.iloc[4:8] = ts.iloc[4:8] tm.assert_series_equal(result, ts) diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index d6462e13b1ce7..ad5a2de6eabac 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -14,7 +14,7 @@ def test_basic(self): N = 50 rng = date_range("1/1/1990", periods=N, freq="53s") ts = Series(np.random.randn(N), index=rng) - ts[15:30] = np.nan + ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = ts.asof(dates) @@ -39,8 +39,8 @@ def test_scalar(self): N = 30 rng = date_range("1/1/1990", periods=N, freq="53s") ts = Series(np.arange(N), index=rng) - ts[5:10] = np.NaN - ts[15:20] = np.NaN + ts.iloc[5:10] = np.NaN + ts.iloc[15:20] = np.NaN val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) @@ -96,7 +96,7 @@ def test_periodindex(self): N = 50 rng = period_range("1/1/1990", periods=N, freq="H") ts = Series(np.random.randn(N), index=rng) - ts[15:30] = np.nan + ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="37min") result = ts.asof(dates) @@ -114,8 +114,8 @@ def test_periodindex(self): rs = result[mask] assert (rs == ts[lb]).all() - ts[5:10] = np.nan - ts[15:20] = np.nan + ts.iloc[5:10] = np.nan + ts.iloc[15:20] = np.nan val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 10197766ce4a6..95d04c9a45d25 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -74,7 +74,7 @@ def test_add_series_with_period_index(self): result = ts + ts[::2] expected = ts + ts - expected[1::2] = np.nan + expected.iloc[1::2] = np.nan tm.assert_series_equal(result, expected) result = ts + _permute(ts[::2]) From 19ae087dc5a8cd161e0434a5ef43668a7fb6f44b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 09:07:03 -0700 Subject: [PATCH 272/388] PERF: do DataFrame.op(series, axis=0) blockwise (#31296) --- asv_bench/benchmarks/arithmetic.py | 30 +++++++++ doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/_libs/ops.pyx | 2 +- pandas/core/frame.py | 14 ----- pandas/core/ops/__init__.py | 21 +++++-- pandas/core/ops/array_ops.py | 74 +++++++++++++++++++++-- pandas/tests/arithmetic/test_array_ops.py | 17 +++++- pandas/tests/frame/test_arithmetic.py | 19 ++++++ 8 files changed, 152 insertions(+), 27 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index d1e94f62967f4..5a8b109c21858 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -50,6 +50,36 @@ def time_frame_op_with_scalar(self, dtype, scalar, op): op(self.df, scalar) +class MixedFrameWithSeriesAxis0: + params = [ + [ + "eq", + "ne", + "lt", + "le", + "ge", + "gt", + "add", + "sub", + "div", + "floordiv", + "mul", + "pow", + ] + ] + param_names = ["opname"] + + def setup(self, opname): + arr = np.arange(10 ** 6).reshape(100, -1) + df = DataFrame(arr) + df["C"] = 1.0 + self.df = df + self.ser = df[0] + + def time_frame_op_with_series_axis0(self, opname): + getattr(self.df, opname)(self.ser, axis=0) + + class Ops: params = [[True, False], ["default", 1]] diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 476ceed00fabd..58b0d0e5b8852 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -185,7 +185,7 @@ Performance improvements - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) -- +- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index abe1484e3763d..c0971b91a2fa1 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -100,7 +100,7 @@ def scalar_compare(object[:] values, object val, object op): @cython.wraparound(False) @cython.boundscheck(False) -def vec_compare(object[:] left, object[:] right, object op): +def vec_compare(ndarray[object] left, ndarray[object] right, object op): """ Compare the elements of `left` with the elements of `right` pointwise, with the comparison operation described by `op`. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 60d89b2076e92..cd5d81bc70dd9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5212,20 +5212,6 @@ def _arith_op(left, right): return new_data - def _combine_match_index(self, other: Series, func): - # at this point we have `self.index.equals(other.index)` - - if ops.should_series_dispatch(self, other, func): - # operate column-wise; avoid costly object-casting in `.values` - new_data = ops.dispatch_to_series(self, other, func) - else: - # fastpath --> operate directly on values - other_vals = other.values.reshape(-1, 1) - with np.errstate(all="ignore"): - new_data = func(self.values, other_vals) - new_data = dispatch_fill_zeros(func, self.values, other_vals, new_data) - return new_data - def _construct_result(self, result) -> "DataFrame": """ Wrap the result of an arithmetic, comparison, or logical operation. diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d0adf2da04db3..ed779c5da6d14 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -585,7 +585,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # DataFrame -def _combine_series_frame(left, right, func, axis: int): +def _combine_series_frame(left, right, func, axis: int, str_rep: str): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the axis argument. @@ -596,6 +596,7 @@ def _combine_series_frame(left, right, func, axis: int): right : Series func : binary operator axis : {0, 1} + str_rep : str Returns ------- @@ -603,7 +604,17 @@ def _combine_series_frame(left, right, func, axis: int): """ # We assume that self.align(other, ...) has already been called if axis == 0: - new_data = left._combine_match_index(right, func) + values = right._values + if isinstance(values, np.ndarray): + # We can operate block-wise + values = values.reshape(-1, 1) + + array_op = get_array_op(func, str_rep=str_rep) + bm = left._data.apply(array_op, right=values.T) + return type(left)(bm) + + new_data = dispatch_to_series(left, right, func) + else: new_data = dispatch_to_series(left, right, func, axis="columns") @@ -791,7 +802,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): raise NotImplementedError(f"fill_value {fill_value} not supported.") axis = self._get_axis_number(axis) if axis is not None else 1 - return _combine_series_frame(self, other, pass_op, axis=axis) + return _combine_series_frame( + self, other, pass_op, axis=axis, str_rep=str_rep + ) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: @@ -826,7 +839,7 @@ def f(self, other, axis=default_axis, level=None): elif isinstance(other, ABCSeries): axis = self._get_axis_number(axis) if axis is not None else 1 - return _combine_series_frame(self, other, op, axis=axis) + return _combine_series_frame(self, other, op, axis=axis, str_rep=str_rep) else: # in this case we always have `np.ndim(other) == 0` new_data = dispatch_to_series(self, other, op, str_rep) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 2c9105c52cf9b..e285c53d9813e 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -28,7 +28,6 @@ ABCDatetimeArray, ABCExtensionArray, ABCIndex, - ABCIndexClass, ABCSeries, ABCTimedeltaArray, ) @@ -53,13 +52,15 @@ def comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, (ABCSeries, ABCIndex)): y = y.values - result = libops.vec_compare(x.ravel(), y, op) + if x.shape != y.shape: + raise ValueError("Shapes must match", x.shape, y.shape) + result = libops.vec_compare(x.ravel(), y.ravel(), op) else: result = libops.scalar_compare(x.ravel(), y, op) return result.reshape(x.shape) -def masked_arith_op(x, y, op): +def masked_arith_op(x: np.ndarray, y, op): """ If the given arithmetic operation fails, attempt it again on only the non-null elements of the input array(s). @@ -78,10 +79,22 @@ def masked_arith_op(x, y, op): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) + if len(x) != len(y): + if not _can_broadcast(x, y): + raise ValueError(x.shape, y.shape) + + # Call notna on pre-broadcasted y for performance + ymask = notna(y) + y = np.broadcast_to(y, x.shape) + ymask = np.broadcast_to(ymask, x.shape) + + else: + ymask = notna(y) + # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex # we would get int64 dtype, see GH#19956 yrav = y.ravel() - mask = notna(xrav) & notna(yrav) + mask = notna(xrav) & ymask.ravel() if yrav.shape != mask.shape: # FIXME: GH#5284, GH#5035, GH#19448 @@ -211,6 +224,51 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): return res_values +def _broadcast_comparison_op(lvalues, rvalues, op) -> np.ndarray: + """ + Broadcast a comparison operation between two 2D arrays. + + Parameters + ---------- + lvalues : np.ndarray or ExtensionArray + rvalues : np.ndarray or ExtensionArray + + Returns + ------- + np.ndarray[bool] + """ + if isinstance(rvalues, np.ndarray): + rvalues = np.broadcast_to(rvalues, lvalues.shape) + result = comparison_op(lvalues, rvalues, op) + else: + result = np.empty(lvalues.shape, dtype=bool) + for i in range(len(lvalues)): + result[i, :] = comparison_op(lvalues[i], rvalues[:, 0], op) + return result + + +def _can_broadcast(lvalues, rvalues) -> bool: + """ + Check if we can broadcast rvalues to match the shape of lvalues. + + Parameters + ---------- + lvalues : np.ndarray or ExtensionArray + rvalues : np.ndarray or ExtensionArray + + Returns + ------- + bool + """ + # We assume that lengths dont match + if lvalues.ndim == rvalues.ndim == 2: + # See if we can broadcast unambiguously + if lvalues.shape[1] == rvalues.shape[-1]: + if rvalues.shape[0] == 1: + return True + return False + + def comparison_op( left: ArrayLike, right: Any, op, str_rep: Optional[str] = None, ) -> ArrayLike: @@ -237,12 +295,16 @@ def comparison_op( # TODO: same for tuples? rvalues = np.asarray(rvalues) - if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)): + if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): # TODO: make this treatment consistent across ops and classes. # We are not catching all listlikes here (e.g. frozenset, tuple) # The ambiguous case is object-dtype. See GH#27803 if len(lvalues) != len(rvalues): - raise ValueError("Lengths must match to compare") + if _can_broadcast(lvalues, rvalues): + return _broadcast_comparison_op(lvalues, rvalues, op) + raise ValueError( + "Lengths must match to compare", lvalues.shape, rvalues.shape + ) if should_extension_dispatch(lvalues, rvalues): res_values = dispatch_to_extension_op(op, lvalues, rvalues) diff --git a/pandas/tests/arithmetic/test_array_ops.py b/pandas/tests/arithmetic/test_array_ops.py index d8aaa3183a1c6..53cb10ba9fc5e 100644 --- a/pandas/tests/arithmetic/test_array_ops.py +++ b/pandas/tests/arithmetic/test_array_ops.py @@ -4,7 +4,7 @@ import pytest import pandas._testing as tm -from pandas.core.ops.array_ops import na_logical_op +from pandas.core.ops.array_ops import comparison_op, na_logical_op def test_na_logical_op_2d(): @@ -19,3 +19,18 @@ def test_na_logical_op_2d(): result = na_logical_op(left, right, operator.or_) expected = right tm.assert_numpy_array_equal(result, expected) + + +def test_object_comparison_2d(): + left = np.arange(9).reshape(3, 3).astype(object) + right = left.T + + result = comparison_op(left, right, operator.eq) + expected = np.eye(3).astype(bool) + tm.assert_numpy_array_equal(result, expected) + + # Ensure that cython doesn't raise on non-writeable arg, which + # we can get from np.broadcast_to + right.flags.writeable = False + result = comparison_op(left, right, operator.ne) + tm.assert_numpy_array_equal(result, ~expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index e4be8a979a70f..92d86c8b602ff 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -348,6 +348,25 @@ def test_floordiv_axis0(self): result2 = df.floordiv(ser.values, axis=0) tm.assert_frame_equal(result2, expected) + @pytest.mark.slow + @pytest.mark.parametrize("opname", ["floordiv", "pow"]) + def test_floordiv_axis0_numexpr_path(self, opname): + # case that goes through numexpr and has to fall back to masked_arith_op + op = getattr(operator, opname) + + arr = np.arange(10 ** 6).reshape(100, -1) + df = pd.DataFrame(arr) + df["C"] = 1.0 + + ser = df[0] + result = getattr(df, opname)(ser, axis=0) + + expected = pd.DataFrame({col: op(df[col], ser) for col in df.columns}) + tm.assert_frame_equal(result, expected) + + result2 = getattr(df, opname)(ser.values, axis=0) + tm.assert_frame_equal(result2, expected) + def test_df_add_td64_columnwise(self): # GH 22534 Check that column-wise addition broadcasts correctly dti = pd.date_range("2016-01-01", periods=10) From 6f5287b91f5a6dadfbb5c2b58563d693c438ea6a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 09:09:09 -0700 Subject: [PATCH 273/388] CLN: avoid values_from_object in nanops (#32508) --- pandas/core/nanops.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 725d1adc6d161..78313f5c3bbbf 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -32,6 +32,8 @@ ) from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna +from pandas.core.construction import extract_array + bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") _BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False @@ -284,14 +286,8 @@ def _get_values( mask = _maybe_get_mask(values, skipna, mask) - if is_datetime64tz_dtype(values): - # lib.values_from_object returns M8[ns] dtype instead of tz-aware, - # so this case must be handled separately from the rest - dtype = values.dtype - values = getattr(values, "_values", values) - else: - values = lib.values_from_object(values) - dtype = values.dtype + values = extract_array(values, extract_numpy=True) + dtype = values.dtype if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): # changing timedelta64/datetime64 to int64 needs to happen after @@ -758,7 +754,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): >>> nanops.nanvar(s) 1.0 """ - values = lib.values_from_object(values) + values = extract_array(values, extract_numpy=True) dtype = values.dtype mask = _maybe_get_mask(values, skipna, mask) if is_any_int_dtype(values): @@ -985,7 +981,7 @@ def nanskew( >>> nanops.nanskew(s) 1.7320508075688787 """ - values = lib.values_from_object(values) + values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") @@ -1069,7 +1065,7 @@ def nankurt( >>> nanops.nankurt(s) -1.2892561983471076 """ - values = lib.values_from_object(values) + values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") From 90bb1aa4840199d6bba5a76b438aff960f4b9497 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 09:11:21 -0700 Subject: [PATCH 274/388] API: allow step!=1 slice with IntervalIndex (#31658) * Allow step!=1 in slicing Series with IntervalIndex * Whatsnew * doc suggestion * test for interval step * remove commented-out * reword whatsnew * disallow label-based with step!=1 * black fixup * update error message * typo fixup --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/indexers.py | 29 +++++++++++++++++ pandas/core/indexes/interval.py | 12 ++++++- pandas/tests/indexes/test_base.py | 3 +- .../indexing/interval/test_interval_new.py | 32 +++++++++++++++---- 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 58b0d0e5b8852..d644a995a4876 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -67,6 +67,7 @@ Other enhancements - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`) - `OptionError` is now exposed in `pandas.errors` (:issue:`27553`) - :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) +- Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`) - .. --------------------------------------------------------------------------- @@ -248,7 +249,6 @@ Strings Interval ^^^^^^^^ - - - diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 3858e750326b4..71fd5b6aab821 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -11,6 +11,7 @@ is_array_like, is_bool_dtype, is_extension_array_dtype, + is_integer, is_integer_dtype, is_list_like, ) @@ -20,6 +21,34 @@ # Indexer Identification +def is_valid_positional_slice(slc: slice) -> bool: + """ + Check if a slice object can be interpreted as a positional indexer. + + Parameters + ---------- + slc : slice + + Returns + ------- + bool + + Notes + ----- + A valid positional slice may also be interpreted as a label-based slice + depending on the index being sliced. + """ + + def is_int_or_none(val): + return val is None or is_integer(val) + + return ( + is_int_or_none(slc.start) + and is_int_or_none(slc.stop) + and is_int_or_none(slc.step) + ) + + def is_list_like_indexer(key) -> bool: """ Check if we have a list-like indexer that is *not* a NamedTuple. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6968837fb13e6..efdaf5a331f5f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -39,6 +39,7 @@ from pandas.core.algorithms import take_1d from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs import pandas.core.common as com +from pandas.core.indexers import is_valid_positional_slice import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, @@ -866,7 +867,16 @@ def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray: def _convert_slice_indexer(self, key: slice, kind: str): if not (key.step is None or key.step == 1): - raise ValueError("cannot support not-default step in a slice") + # GH#31658 if label-based, we require step == 1, + # if positional, we disallow float start/stop + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + if kind == "loc": + raise ValueError(msg) + elif kind == "getitem": + if not is_valid_positional_slice(key): + # i.e. this cannot be interpreted as a positional slice + raise ValueError(msg) + return super()._convert_slice_indexer(key, kind) @Appender(Index.where.__doc__) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 0c4a790646a81..73af98c16afae 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2598,7 +2598,8 @@ def test_convert_almost_null_slice(indices): key = slice(None, None, "foo") if isinstance(idx, pd.IntervalIndex): - with pytest.raises(ValueError, match="cannot support not-default step"): + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): idx._convert_slice_indexer(key, "loc") else: msg = "'>=' not supported between instances of 'str' and 'int'" diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index 43036fbbd9844..03c3034772bc6 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -128,9 +128,6 @@ def test_loc_with_slices(self): with pytest.raises(NotImplementedError, match=msg): s[Interval(3, 4, closed="left") :] - # TODO with non-existing intervals ? - # s.loc[Interval(-1, 0):Interval(2, 3)] - # slice of scalar expected = s.iloc[:3] @@ -143,9 +140,32 @@ def test_loc_with_slices(self): tm.assert_series_equal(expected, s[:2.5]) tm.assert_series_equal(expected, s[0.1:2.5]) - # slice of scalar with step != 1 - with pytest.raises(ValueError): - s[0:4:2] + def test_slice_step_ne1(self): + # GH#31658 slice of scalar with step != 1 + s = self.s + expected = s.iloc[0:4:2] + + result = s[0:4:2] + tm.assert_series_equal(result, expected) + + result2 = s[0:4][::2] + tm.assert_series_equal(result2, expected) + + def test_slice_float_start_stop(self): + # GH#31658 slicing with integers is positional, with floats is not + # supported + ser = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + ser[1.5:9.5:2] + + def test_slice_interval_step(self): + # GH#31658 allows for integer step!=1, not Interval step + s = self.s + msg = "label-based slicing with step!=1 is not supported for IntervalIndex" + with pytest.raises(ValueError, match=msg): + s[0 : 4 : Interval(0, 1)] def test_loc_with_overlap(self): From 08c65973643b73e877133d1a7391de7092f4b267 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 09:13:46 -0700 Subject: [PATCH 275/388] CLN: remove check_series_type (#32513) --- pandas/_testing.py | 58 +++++++++++--------------- pandas/tests/io/pytables/test_store.py | 2 +- 2 files changed, 25 insertions(+), 35 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 33ec4e4886aa6..5e94ac3b3d108 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -34,6 +34,7 @@ is_interval_dtype, is_list_like, is_number, + is_numeric_dtype, is_period_dtype, is_sequence, is_timedelta64_dtype, @@ -1064,7 +1065,6 @@ def assert_series_equal( right, check_dtype=True, check_index_type="equiv", - check_series_type=True, check_less_precise=False, check_names=True, check_exact=False, @@ -1085,8 +1085,6 @@ def assert_series_equal( check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. - check_series_type : bool, default True - Whether to check the Series class is identical. check_less_precise : bool or int, default False Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. @@ -1118,11 +1116,10 @@ def assert_series_equal( # instance validation _check_isinstance(left, right, Series) - if check_series_type: - # ToDo: There are some tests using rhs is sparse - # lhs is dense. Should use assert_class_equal in future - assert isinstance(left, type(right)) - # assert_class_equal(left, right, obj=obj) + # TODO: There are some tests using rhs is sparse + # lhs is dense. Should use assert_class_equal in future + assert isinstance(left, type(right)) + # assert_class_equal(left, right, obj=obj) # length comparison if len(left) != len(right): @@ -1147,8 +1144,8 @@ def assert_series_equal( # is False. We'll still raise if only one is a `Categorical`, # regardless of `check_categorical` if ( - is_categorical_dtype(left) - and is_categorical_dtype(right) + is_categorical_dtype(left.dtype) + and is_categorical_dtype(right.dtype) and not check_categorical ): pass @@ -1156,38 +1153,31 @@ def assert_series_equal( assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") if check_exact: + if not is_numeric_dtype(left.dtype): + raise AssertionError("check_exact may only be used with numeric Series") + assert_numpy_array_equal( - left._internal_get_values(), - right._internal_get_values(), - check_dtype=check_dtype, - obj=str(obj), + left._values, right._values, check_dtype=check_dtype, obj=str(obj) ) - elif check_datetimelike_compat: + elif check_datetimelike_compat and ( + needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype) + ): # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check # the values in that case - if needs_i8_conversion(left) or needs_i8_conversion(right): - - # datetimelike may have different objects (e.g. datetime.datetime - # vs Timestamp) but will compare equal - if not Index(left._values).equals(Index(right._values)): - msg = ( - f"[datetimelike_compat=True] {left._values} " - f"is not equal to {right._values}." - ) - raise AssertionError(msg) - else: - assert_numpy_array_equal( - left._internal_get_values(), - right._internal_get_values(), - check_dtype=check_dtype, + + # datetimelike may have different objects (e.g. datetime.datetime + # vs Timestamp) but will compare equal + if not Index(left._values).equals(Index(right._values)): + msg = ( + f"[datetimelike_compat=True] {left._values} " + f"is not equal to {right._values}." ) - elif is_interval_dtype(left) or is_interval_dtype(right): + raise AssertionError(msg) + elif is_interval_dtype(left.dtype) or is_interval_dtype(right.dtype): assert_interval_array_equal(left.array, right.array) - elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype): + elif is_datetime64tz_dtype(left.dtype): # .values is an ndarray, but ._values is the ExtensionArray. - # TODO: Use .array - assert is_extension_array_dtype(right.dtype) assert_extension_array_equal(left._values, right._values) elif ( is_extension_array_dtype(left) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index fcc48daa59a40..34f6a73812c97 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -2312,7 +2312,7 @@ def test_index_types(self, setup_path): values = np.random.randn(2) func = lambda l, r: tm.assert_series_equal( - l, r, check_dtype=True, check_index_type=True, check_series_type=True + l, r, check_dtype=True, check_index_type=True ) with catch_warnings(record=True): From edb863e16bb10bb1af6470d9ce9c63636054052a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 09:14:49 -0700 Subject: [PATCH 276/388] CLN: avoid values_from_object in construction (#32504) --- pandas/core/internals/construction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 57ed2555761be..ab363e10eb098 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -36,7 +36,7 @@ from pandas.core import algorithms, common as com from pandas.core.arrays import Categorical -from pandas.core.construction import sanitize_array +from pandas.core.construction import extract_array, sanitize_array from pandas.core.indexes import base as ibase from pandas.core.indexes.api import ( Index, @@ -519,7 +519,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): else: indexer = indexer_cache[id(index)] = index.get_indexer(columns) - values = com.values_from_object(s) + values = extract_array(s, extract_numpy=True) aligned_values.append(algorithms.take_1d(values, indexer)) values = np.vstack(aligned_values) From a3985f8e3745a8f691d4c80bee2d65dcf355df6b Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Mon, 9 Mar 2020 04:29:29 +0100 Subject: [PATCH 277/388] Fix failure to convert string "uint64" to NaN (#32541) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/_libs/lib.pyx | 2 -- pandas/tests/dtypes/test_inference.py | 7 +++++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d644a995a4876..e745bf3f5feed 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -231,7 +231,7 @@ Timezones Numeric ^^^^^^^ - Bug in :meth:`DataFrame.floordiv` with ``axis=0`` not treating division-by-zero like :meth:`Series.floordiv` (:issue:`31271`) -- +- Bug in :meth:`to_numeric` with string argument ``"uint64"`` and ``errors="coerce"`` silently fails (:issue:`32394`) - Conversion diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 61d6a660a0357..feccb447c5112 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2024,8 +2024,6 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, except (TypeError, ValueError) as err: if not seen.coerce_numeric: raise type(err)(f"{err} at position {i}") - elif "uint64" in str(err): # Exception from check functions. - raise seen.saw_null() floats[i] = NaN diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 48ae1f67297af..b01747ef010c1 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -507,6 +507,13 @@ def test_convert_numeric_int64_uint64(self, case, coerce): result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) tm.assert_almost_equal(result, expected) + def test_convert_numeric_string_uint64(self): + # GH32394 + result = lib.maybe_convert_numeric( + np.array(["uint64"], dtype=object), set(), coerce_numeric=True + ) + assert np.isnan(result) + @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) def test_convert_int_overflow(self, value): # see gh-18584 From 3e1275e74f819c31a2707eb337ca0a9e54c8c089 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Mar 2020 06:47:44 -0700 Subject: [PATCH 278/388] CLN: to_dense->np.asarray (#32545) --- pandas/core/arrays/categorical.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 40a169d03f39c..92859479ec73f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1728,7 +1728,8 @@ def fillna(self, value=None, method=None, limit=None): # pad / bfill if method is not None: - values = self.to_dense().reshape(-1, len(self)) + # TODO: dispatch when self.categories is EA-dtype + values = np.asarray(self).reshape(-1, len(self)) values = interpolate_2d(values, method, 0, None, value).astype( self.categories.dtype )[0] From 787dc8aa90813bb8509ffda84cd2f828d772c7fb Mon Sep 17 00:00:00 2001 From: Anna Daglis <40292327+AnnaDaglis@users.noreply.github.com> Date: Mon, 9 Mar 2020 15:42:16 +0000 Subject: [PATCH 279/388] BUG: Fixed bug, where pandas._libs.lib.maybe_convert_objects function improperly handled arrays with bools and NaNs (#32242) * BUG: Fixed bug, where pandas._libs.lib.maybe_convert_objects function improperly handled arrays with bools and NaNs --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/lib.pyx | 2 +- pandas/tests/dtypes/test_inference.py | 7 +++++ pandas/tests/indexes/test_base.py | 11 ++++++++ .../tests/series/methods/test_value_counts.py | 26 +++++++++++++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 808e6ae709ce9..eec471f989037 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -24,6 +24,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) +- Fixed bug in the repr of an object-dtype ``Index`` with bools and missing values (:issue:`32146`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index feccb447c5112..db1abae4d6ff3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2296,7 +2296,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return uints else: return ints - elif seen.is_bool: + elif seen.is_bool and not seen.nan_: return bools.view(np.bool_) return objects diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index b01747ef010c1..e0fef833d4ced 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -575,6 +575,13 @@ def test_maybe_convert_objects_nullable_integer(self, exp): tm.assert_extension_array_equal(result, exp) + def test_maybe_convert_objects_bool_nan(self): + # GH32146 + ind = pd.Index([True, False, np.nan], dtype=object) + exp = np.array([True, False, np.nan], dtype=object) + out = lib.maybe_convert_objects(ind.values, safe=1) + tm.assert_numpy_array_equal(out, exp) + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 73af98c16afae..9e0cef375fea3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2458,6 +2458,17 @@ def test_intersect_str_dates(self): expected = Index([], dtype=object) tm.assert_index_equal(result, expected) + def test_index_repr_bool_nan(self): + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + exp1 = arr.format() + out1 = ["True", "False", "NaN"] + assert out1 == exp1 + + exp2 = repr(arr) + out2 = "Index([True, False, nan], dtype='object')" + assert out2 == exp2 + class TestIndexUtils: @pytest.mark.parametrize( diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index fdb35befeb0c2..f97362ce9c2a9 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas import Categorical, CategoricalIndex, Series @@ -177,3 +178,28 @@ def test_value_counts_categorical_with_nan(self): exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) res = ser.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize( + "ser, dropna, exp", + [ + ( + pd.Series([False, True, True, pd.NA]), + False, + pd.Series([2, 1, 1], index=[True, False, pd.NA]), + ), + ( + pd.Series([False, True, True, pd.NA]), + True, + pd.Series([2, 1], index=[True, False]), + ), + ( + pd.Series(range(3), index=[True, False, np.nan]).index, + False, + pd.Series([1, 1, 1], index=[True, False, pd.NA]), + ), + ], + ) + def test_value_counts_bool_with_nan(self, ser, dropna, exp): + # GH32146 + out = ser.value_counts(dropna=dropna) + tm.assert_series_equal(out, exp) From 3e8f32c217698a5eb8e269ee1da590f1cc2771b7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Mar 2020 13:17:15 -0700 Subject: [PATCH 280/388] TST: separate out pd.crosstab tests from test_pivot (#32536) --- pandas/tests/reshape/test_crosstab.py | 700 +++++++++++++++++++++++++ pandas/tests/reshape/test_pivot.py | 707 +------------------------- 2 files changed, 701 insertions(+), 706 deletions(-) create mode 100644 pandas/tests/reshape/test_crosstab.py diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py new file mode 100644 index 0000000000000..8795af2e11122 --- /dev/null +++ b/pandas/tests/reshape/test_crosstab.py @@ -0,0 +1,700 @@ +import numpy as np +import pytest + +from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, Series, crosstab +import pandas._testing as tm + + +class TestCrosstab: + def setup_method(self, method): + df = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + self.df = df.append(df, ignore_index=True) + + def test_crosstab_single(self): + df = self.df + result = crosstab(df["A"], df["C"]) + expected = df.groupby(["A", "C"]).size().unstack() + tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64)) + + def test_crosstab_multiple(self): + df = self.df + + result = crosstab(df["A"], [df["B"], df["C"]]) + expected = df.groupby(["A", "B", "C"]).size() + expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64) + tm.assert_frame_equal(result, expected) + + result = crosstab([df["B"], df["C"]], df["A"]) + expected = df.groupby(["B", "C", "A"]).size() + expected = expected.unstack("A").fillna(0).astype(np.int64) + tm.assert_frame_equal(result, expected) + + def test_crosstab_ndarray(self): + a = np.random.randint(0, 5, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 10, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c")) + expected = crosstab(df["a"], [df["b"], df["c"]]) + tm.assert_frame_equal(result, expected) + + result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c")) + expected = crosstab([df["b"], df["c"]], df["a"]) + tm.assert_frame_equal(result, expected) + + # assign arbitrary names + result = crosstab(self.df["A"].values, self.df["C"].values) + assert result.index.name == "row_0" + assert result.columns.name == "col_0" + + def test_crosstab_non_aligned(self): + # GH 17005 + a = Series([0, 1, 1], index=["a", "b", "c"]) + b = Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"]) + c = np.array([3, 4, 3]) + + expected = DataFrame( + [[1, 0], [1, 1]], + index=Index([0, 1], name="row_0"), + columns=Index([3, 4], name="col_0"), + ) + + result = crosstab(a, b) + tm.assert_frame_equal(result, expected) + + result = crosstab(a, c) + tm.assert_frame_equal(result, expected) + + def test_crosstab_margins(self): + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) + + assert result.index.names == ("a",) + assert result.columns.names == ["b", "c"] + + all_cols = result["All", ""] + exp_cols = df.groupby(["a"]).size().astype("i8") + # to keep index.name + exp_margin = Series([len(df)], index=Index(["All"], name="a")) + exp_cols = exp_cols.append(exp_margin) + exp_cols.name = ("All", "") + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc["All"] + exp_rows = df.groupby(["b", "c"]).size().astype("i8") + exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")])) + exp_rows.name = "All" + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + def test_crosstab_margins_set_margin_name(self): + # GH 15972 + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab( + a, + [b, c], + rownames=["a"], + colnames=("b", "c"), + margins=True, + margins_name="TOTAL", + ) + + assert result.index.names == ("a",) + assert result.columns.names == ["b", "c"] + + all_cols = result["TOTAL", ""] + exp_cols = df.groupby(["a"]).size().astype("i8") + # to keep index.name + exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a")) + exp_cols = exp_cols.append(exp_margin) + exp_cols.name = ("TOTAL", "") + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc["TOTAL"] + exp_rows = df.groupby(["b", "c"]).size().astype("i8") + exp_rows = exp_rows.append(Series([len(df)], index=[("TOTAL", "")])) + exp_rows.name = "TOTAL" + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + msg = "margins_name argument must be a string" + for margins_name in [666, None, ["a", "b"]]: + with pytest.raises(ValueError, match=msg): + crosstab( + a, + [b, c], + rownames=["a"], + colnames=("b", "c"), + margins=True, + margins_name=margins_name, + ) + + def test_crosstab_pass_values(self): + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + values = np.random.randn(100) + + table = crosstab( + [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"] + ) + + df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values}) + + expected = df.pivot_table( + "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum + ) + tm.assert_frame_equal(table, expected) + + def test_crosstab_dropna(self): + # GH 3820 + a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) + b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object) + c = np.array( + ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object + ) + res = crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False) + m = MultiIndex.from_tuples( + [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")], + names=["b", "c"], + ) + tm.assert_index_equal(res.columns, m) + + def test_crosstab_no_overlap(self): + # GS 10291 + + s1 = Series([1, 2, 3], index=[1, 2, 3]) + s2 = Series([4, 5, 6], index=[4, 5, 6]) + + actual = crosstab(s1, s2) + expected = DataFrame() + + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna(self): + # GH 12577 + # pivot_table counts null into margin ('All') + # when margins=true and dropna=true + + df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) + actual = crosstab(df.a, df.b, margins=True, dropna=True) + expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} + ) + actual = crosstab(df.a, df.b, margins=True, dropna=True) + expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3.0, 4.0, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]} + ) + actual = crosstab(df.a, df.b, margins=True, dropna=True) + expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + # GH 12642 + # _add_margins raises KeyError: Level None not found + # when margins=True and dropna=False + df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) + actual = crosstab(df.a, df.b, margins=True, dropna=False) + expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} + ) + actual = crosstab(df.a, df.b, margins=True, dropna=False) + expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3.0, 4.0, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) + b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object) + c = np.array( + ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object + ) + + actual = crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False + ) + m = MultiIndex.from_arrays( + [ + ["one", "one", "two", "two", "All"], + ["dull", "shiny", "dull", "shiny", ""], + ], + names=["b", "c"], + ) + expected = DataFrame( + [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m + ) + expected.index = Index(["bar", "foo", "All"], name="a") + tm.assert_frame_equal(actual, expected) + + actual = crosstab( + [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False + ) + m = MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], + names=["a", "b"], + ) + expected = DataFrame( + [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m + ) + expected.columns = Index(["dull", "shiny", "All"], name="c") + tm.assert_frame_equal(actual, expected) + + actual = crosstab( + [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True + ) + m = MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], + names=["a", "b"], + ) + expected = DataFrame( + [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m + ) + expected.columns = Index(["dull", "shiny", "All"], name="c") + tm.assert_frame_equal(actual, expected) + + def test_crosstab_normalize(self): + # Issue 12578 + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + + rindex = Index([1, 2], name="a") + cindex = Index([3, 4], name="b") + full_normal = DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex) + row_normal = DataFrame([[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex) + col_normal = DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex) + + # Check all normalize args + tm.assert_frame_equal(crosstab(df.a, df.b, normalize="all"), full_normal) + tm.assert_frame_equal(crosstab(df.a, df.b, normalize=True), full_normal) + tm.assert_frame_equal(crosstab(df.a, df.b, normalize="index"), row_normal) + tm.assert_frame_equal(crosstab(df.a, df.b, normalize="columns"), col_normal) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize=1), + crosstab(df.a, df.b, normalize="columns"), + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize=0), crosstab(df.a, df.b, normalize="index"), + ) + + row_normal_margins = DataFrame( + [[1.0, 0], [0.25, 0.75], [0.4, 0.6]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4], name="b", dtype="object"), + ) + col_normal_margins = DataFrame( + [[0.5, 0, 0.2], [0.5, 1.0, 0.8]], + index=Index([1, 2], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b", dtype="object"), + ) + + all_normal_margins = DataFrame( + [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b", dtype="object"), + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize="columns", margins=True), col_normal_margins, + ) + tm.assert_frame_equal( + crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins + ) + + # Test arrays + crosstab( + [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2]) + ) + + # Test with aggfunc + norm_counts = DataFrame( + [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b"), + ) + test_case = crosstab( + df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True + ) + tm.assert_frame_equal(test_case, norm_counts) + + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]} + ) + + norm_sum = DataFrame( + [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]], + index=Index([1, 2, "All"], name="a", dtype="object"), + columns=Index([3, 4, "All"], name="b", dtype="object"), + ) + test_case = crosstab( + df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True + ) + tm.assert_frame_equal(test_case, norm_sum) + + def test_crosstab_with_empties(self): + # Check handling of empties + df = DataFrame( + { + "a": [1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4], + "c": [np.nan, np.nan, np.nan, np.nan, np.nan], + } + ) + + empty = DataFrame( + [[0.0, 0.0], [0.0, 0.0]], + index=Index([1, 2], name="a", dtype="int64"), + columns=Index([3, 4], name="b"), + ) + + for i in [True, "index", "columns"]: + calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=i) + tm.assert_frame_equal(empty, calculated) + + nans = DataFrame( + [[0.0, np.nan], [0.0, 0.0]], + index=Index([1, 2], name="a", dtype="int64"), + columns=Index([3, 4], name="b"), + ) + + calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=False) + tm.assert_frame_equal(nans, calculated) + + def test_crosstab_errors(self): + # Issue 12578 + + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + + error = "values cannot be used without an aggfunc." + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, values=df.c) + + error = "aggfunc cannot be used without values" + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, aggfunc=np.mean) + + error = "Not a valid normalize argument" + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, normalize="42") + + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, normalize=42) + + error = "Not a valid margins argument" + with pytest.raises(ValueError, match=error): + crosstab(df.a, df.b, normalize="all", margins=42) + + def test_crosstab_with_categorial_columns(self): + # GH 8860 + df = DataFrame( + { + "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"], + "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"], + } + ) + categories = ["Sedan", "Electric", "Pickup"] + df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories) + result = crosstab(df["MAKE"], df["MODEL"]) + + expected_index = Index(["Acura", "Honda", "Tesla"], name="MAKE") + expected_columns = CategoricalIndex( + categories, categories=categories, ordered=False, name="MODEL" + ) + expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]] + expected = DataFrame( + expected_data, index=expected_index, columns=expected_columns + ) + tm.assert_frame_equal(result, expected) + + def test_crosstab_with_numpy_size(self): + # GH 4003 + df = DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": np.random.randn(24), + "E": np.random.randn(24), + } + ) + result = crosstab( + index=[df["A"], df["B"]], + columns=[df["C"]], + margins=True, + aggfunc=np.size, + values=df["D"], + ) + expected_index = MultiIndex( + levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]], + codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]], + names=["A", "B"], + ) + expected_column = Index(["bar", "foo", "All"], dtype="object", name="C") + expected_data = np.array( + [ + [2.0, 2.0, 4.0], + [2.0, 2.0, 4.0], + [2.0, 2.0, 4.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [12.0, 12.0, 24.0], + ] + ) + expected = DataFrame( + expected_data, index=expected_index, columns=expected_column + ) + tm.assert_frame_equal(result, expected) + + def test_crosstab_dup_index_names(self): + # GH 13279 + s = Series(range(3), name="foo") + + result = crosstab(s, s) + expected_index = Index(range(3), name="foo") + expected = DataFrame( + np.eye(3, dtype=np.int64), index=expected_index, columns=expected_index + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]]) + def test_crosstab_tuple_name(self, names): + s1 = Series(range(3), name=names[0]) + s2 = Series(range(1, 4), name=names[1]) + + mi = MultiIndex.from_arrays([range(3), range(1, 4)], names=names) + expected = Series(1, index=mi).unstack(1, fill_value=0) + + result = crosstab(s1, s2) + tm.assert_frame_equal(result, expected) + + def test_crosstab_both_tuple_names(self): + # GH 18321 + s1 = Series(range(3), name=("a", "b")) + s2 = Series(range(3), name=("c", "d")) + + expected = DataFrame( + np.eye(3, dtype="int64"), + index=Index(range(3), name=("a", "b")), + columns=Index(range(3), name=("c", "d")), + ) + result = crosstab(s1, s2) + tm.assert_frame_equal(result, expected) + + def test_crosstab_unsorted_order(self): + df = DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"]) + result = crosstab(df.index, [df.b, df.a]) + e_idx = Index(["A", "B", "C"], name="row_0") + e_columns = MultiIndex.from_tuples([(1, 4), (2, 6), (3, 5)], names=["b", "a"]) + expected = DataFrame( + [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns + ) + tm.assert_frame_equal(result, expected) + + def test_crosstab_normalize_multiple_columns(self): + # GH 15150 + df = DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": [0] * 24, + "E": [0] * 24, + } + ) + result = crosstab( + [df.A, df.B], + df.C, + values=df.D, + aggfunc=np.sum, + normalize=True, + margins=True, + ) + expected = DataFrame( + np.array([0] * 29 + [1], dtype=float).reshape(10, 3), + columns=Index(["bar", "foo", "All"], dtype="object", name="C"), + index=MultiIndex.from_tuples( + [ + ("one", "A"), + ("one", "B"), + ("one", "C"), + ("three", "A"), + ("three", "B"), + ("three", "C"), + ("two", "A"), + ("two", "B"), + ("two", "C"), + ("All", ""), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_margin_normalize(self): + # GH 27500 + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) + # normalize on index + result = crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0 + ) + expected = DataFrame( + [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]] + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + expected.columns = Index(["large", "small"], dtype="object", name="C") + tm.assert_frame_equal(result, expected) + + # normalize on columns + result = crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1 + ) + expected = DataFrame( + [ + [0.25, 0.2, 0.222222], + [0.25, 0.2, 0.222222], + [0.5, 0.2, 0.333333], + [0, 0.4, 0.222222], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["bar", "foo"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["A", "B"], + ) + tm.assert_frame_equal(result, expected) + + # normalize on both index and column + result = crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True + ) + expected = DataFrame( + [ + [0.111111, 0.111111, 0.222222], + [0.111111, 0.111111, 0.222222], + [0.222222, 0.111111, 0.333333], + [0.000000, 0.222222, 0.222222], + [0.444444, 0.555555, 1], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e09a2a7907177..75c3c565e9d58 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -17,7 +17,7 @@ ) import pandas._testing as tm from pandas.api.types import CategoricalDtype as CDT -from pandas.core.reshape.pivot import crosstab, pivot_table +from pandas.core.reshape.pivot import pivot_table @pytest.fixture(params=[True, False]) @@ -2064,708 +2064,3 @@ def agg(l): ) with pytest.raises(KeyError, match="notpresent"): foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) - - -class TestCrosstab: - def setup_method(self, method): - df = DataFrame( - { - "A": [ - "foo", - "foo", - "foo", - "foo", - "bar", - "bar", - "bar", - "bar", - "foo", - "foo", - "foo", - ], - "B": [ - "one", - "one", - "one", - "two", - "one", - "one", - "one", - "two", - "two", - "two", - "one", - ], - "C": [ - "dull", - "dull", - "shiny", - "dull", - "dull", - "shiny", - "shiny", - "dull", - "shiny", - "shiny", - "shiny", - ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), - } - ) - - self.df = df.append(df, ignore_index=True) - - def test_crosstab_single(self): - df = self.df - result = crosstab(df["A"], df["C"]) - expected = df.groupby(["A", "C"]).size().unstack() - tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64)) - - def test_crosstab_multiple(self): - df = self.df - - result = crosstab(df["A"], [df["B"], df["C"]]) - expected = df.groupby(["A", "B", "C"]).size() - expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64) - tm.assert_frame_equal(result, expected) - - result = crosstab([df["B"], df["C"]], df["A"]) - expected = df.groupby(["B", "C", "A"]).size() - expected = expected.unstack("A").fillna(0).astype(np.int64) - tm.assert_frame_equal(result, expected) - - def test_crosstab_ndarray(self): - a = np.random.randint(0, 5, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 10, size=100) - - df = DataFrame({"a": a, "b": b, "c": c}) - - result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c")) - expected = crosstab(df["a"], [df["b"], df["c"]]) - tm.assert_frame_equal(result, expected) - - result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c")) - expected = crosstab([df["b"], df["c"]], df["a"]) - tm.assert_frame_equal(result, expected) - - # assign arbitrary names - result = crosstab(self.df["A"].values, self.df["C"].values) - assert result.index.name == "row_0" - assert result.columns.name == "col_0" - - def test_crosstab_non_aligned(self): - # GH 17005 - a = pd.Series([0, 1, 1], index=["a", "b", "c"]) - b = pd.Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"]) - c = np.array([3, 4, 3]) - - expected = pd.DataFrame( - [[1, 0], [1, 1]], - index=Index([0, 1], name="row_0"), - columns=Index([3, 4], name="col_0"), - ) - - result = crosstab(a, b) - tm.assert_frame_equal(result, expected) - - result = crosstab(a, c) - tm.assert_frame_equal(result, expected) - - def test_crosstab_margins(self): - a = np.random.randint(0, 7, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 5, size=100) - - df = DataFrame({"a": a, "b": b, "c": c}) - - result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) - - assert result.index.names == ("a",) - assert result.columns.names == ["b", "c"] - - all_cols = result["All", ""] - exp_cols = df.groupby(["a"]).size().astype("i8") - # to keep index.name - exp_margin = Series([len(df)], index=Index(["All"], name="a")) - exp_cols = exp_cols.append(exp_margin) - exp_cols.name = ("All", "") - - tm.assert_series_equal(all_cols, exp_cols) - - all_rows = result.loc["All"] - exp_rows = df.groupby(["b", "c"]).size().astype("i8") - exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")])) - exp_rows.name = "All" - - exp_rows = exp_rows.reindex(all_rows.index) - exp_rows = exp_rows.fillna(0).astype(np.int64) - tm.assert_series_equal(all_rows, exp_rows) - - def test_crosstab_margins_set_margin_name(self): - # GH 15972 - a = np.random.randint(0, 7, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 5, size=100) - - df = DataFrame({"a": a, "b": b, "c": c}) - - result = crosstab( - a, - [b, c], - rownames=["a"], - colnames=("b", "c"), - margins=True, - margins_name="TOTAL", - ) - - assert result.index.names == ("a",) - assert result.columns.names == ["b", "c"] - - all_cols = result["TOTAL", ""] - exp_cols = df.groupby(["a"]).size().astype("i8") - # to keep index.name - exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a")) - exp_cols = exp_cols.append(exp_margin) - exp_cols.name = ("TOTAL", "") - - tm.assert_series_equal(all_cols, exp_cols) - - all_rows = result.loc["TOTAL"] - exp_rows = df.groupby(["b", "c"]).size().astype("i8") - exp_rows = exp_rows.append(Series([len(df)], index=[("TOTAL", "")])) - exp_rows.name = "TOTAL" - - exp_rows = exp_rows.reindex(all_rows.index) - exp_rows = exp_rows.fillna(0).astype(np.int64) - tm.assert_series_equal(all_rows, exp_rows) - - msg = "margins_name argument must be a string" - for margins_name in [666, None, ["a", "b"]]: - with pytest.raises(ValueError, match=msg): - crosstab( - a, - [b, c], - rownames=["a"], - colnames=("b", "c"), - margins=True, - margins_name=margins_name, - ) - - def test_crosstab_pass_values(self): - a = np.random.randint(0, 7, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 5, size=100) - values = np.random.randn(100) - - table = crosstab( - [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"] - ) - - df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values}) - - expected = df.pivot_table( - "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum - ) - tm.assert_frame_equal(table, expected) - - def test_crosstab_dropna(self): - # GH 3820 - a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) - b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object) - c = np.array( - ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object - ) - res = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False) - m = MultiIndex.from_tuples( - [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")], - names=["b", "c"], - ) - tm.assert_index_equal(res.columns, m) - - def test_crosstab_no_overlap(self): - # GS 10291 - - s1 = pd.Series([1, 2, 3], index=[1, 2, 3]) - s2 = pd.Series([4, 5, 6], index=[4, 5, 6]) - - actual = crosstab(s1, s2) - expected = pd.DataFrame() - - tm.assert_frame_equal(actual, expected) - - def test_margin_dropna(self): - # GH 12577 - # pivot_table counts null into margin ('All') - # when margins=true and dropna=true - - df = pd.DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) - actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) - expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]]) - expected.index = Index([1.0, 2.0, "All"], name="a") - expected.columns = Index([3, 4, "All"], name="b") - tm.assert_frame_equal(actual, expected) - - df = DataFrame( - {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} - ) - actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) - expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) - expected.index = Index([1.0, 2.0, "All"], name="a") - expected.columns = Index([3.0, 4.0, "All"], name="b") - tm.assert_frame_equal(actual, expected) - - df = DataFrame( - {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]} - ) - actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) - expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) - expected.index = Index([1.0, 2.0, "All"], name="a") - expected.columns = Index([3, 4, "All"], name="b") - tm.assert_frame_equal(actual, expected) - - # GH 12642 - # _add_margins raises KeyError: Level None not found - # when margins=True and dropna=False - df = pd.DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) - actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) - expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) - expected.index = Index([1.0, 2.0, "All"], name="a") - expected.columns = Index([3, 4, "All"], name="b") - tm.assert_frame_equal(actual, expected) - - df = DataFrame( - {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} - ) - actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) - expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) - expected.index = Index([1.0, 2.0, "All"], name="a") - expected.columns = Index([3.0, 4.0, "All"], name="b") - tm.assert_frame_equal(actual, expected) - - a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) - b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object) - c = np.array( - ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object - ) - - actual = pd.crosstab( - a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False - ) - m = MultiIndex.from_arrays( - [ - ["one", "one", "two", "two", "All"], - ["dull", "shiny", "dull", "shiny", ""], - ], - names=["b", "c"], - ) - expected = DataFrame( - [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m - ) - expected.index = Index(["bar", "foo", "All"], name="a") - tm.assert_frame_equal(actual, expected) - - actual = pd.crosstab( - [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False - ) - m = MultiIndex.from_arrays( - [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], - names=["a", "b"], - ) - expected = DataFrame( - [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m - ) - expected.columns = Index(["dull", "shiny", "All"], name="c") - tm.assert_frame_equal(actual, expected) - - actual = pd.crosstab( - [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True - ) - m = MultiIndex.from_arrays( - [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], - names=["a", "b"], - ) - expected = DataFrame( - [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m - ) - expected.columns = Index(["dull", "shiny", "All"], name="c") - tm.assert_frame_equal(actual, expected) - - def test_crosstab_normalize(self): - # Issue 12578 - df = pd.DataFrame( - {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} - ) - - rindex = pd.Index([1, 2], name="a") - cindex = pd.Index([3, 4], name="b") - full_normal = pd.DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex) - row_normal = pd.DataFrame( - [[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex - ) - col_normal = pd.DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex) - - # Check all normalize args - tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="all"), full_normal) - tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=True), full_normal) - tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="index"), row_normal) - tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="columns"), col_normal) - tm.assert_frame_equal( - pd.crosstab(df.a, df.b, normalize=1), - pd.crosstab(df.a, df.b, normalize="columns"), - ) - tm.assert_frame_equal( - pd.crosstab(df.a, df.b, normalize=0), - pd.crosstab(df.a, df.b, normalize="index"), - ) - - row_normal_margins = pd.DataFrame( - [[1.0, 0], [0.25, 0.75], [0.4, 0.6]], - index=pd.Index([1, 2, "All"], name="a", dtype="object"), - columns=pd.Index([3, 4], name="b", dtype="object"), - ) - col_normal_margins = pd.DataFrame( - [[0.5, 0, 0.2], [0.5, 1.0, 0.8]], - index=pd.Index([1, 2], name="a", dtype="object"), - columns=pd.Index([3, 4, "All"], name="b", dtype="object"), - ) - - all_normal_margins = pd.DataFrame( - [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]], - index=pd.Index([1, 2, "All"], name="a", dtype="object"), - columns=pd.Index([3, 4, "All"], name="b", dtype="object"), - ) - tm.assert_frame_equal( - pd.crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins - ) - tm.assert_frame_equal( - pd.crosstab(df.a, df.b, normalize="columns", margins=True), - col_normal_margins, - ) - tm.assert_frame_equal( - pd.crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins - ) - - # Test arrays - pd.crosstab( - [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2]) - ) - - # Test with aggfunc - norm_counts = pd.DataFrame( - [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]], - index=pd.Index([1, 2, "All"], name="a", dtype="object"), - columns=pd.Index([3, 4, "All"], name="b"), - ) - test_case = pd.crosstab( - df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True - ) - tm.assert_frame_equal(test_case, norm_counts) - - df = pd.DataFrame( - {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]} - ) - - norm_sum = pd.DataFrame( - [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]], - index=pd.Index([1, 2, "All"], name="a", dtype="object"), - columns=pd.Index([3, 4, "All"], name="b", dtype="object"), - ) - test_case = pd.crosstab( - df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True - ) - tm.assert_frame_equal(test_case, norm_sum) - - def test_crosstab_with_empties(self): - # Check handling of empties - df = pd.DataFrame( - { - "a": [1, 2, 2, 2, 2], - "b": [3, 3, 4, 4, 4], - "c": [np.nan, np.nan, np.nan, np.nan, np.nan], - } - ) - - empty = pd.DataFrame( - [[0.0, 0.0], [0.0, 0.0]], - index=pd.Index([1, 2], name="a", dtype="int64"), - columns=pd.Index([3, 4], name="b"), - ) - - for i in [True, "index", "columns"]: - calculated = pd.crosstab( - df.a, df.b, values=df.c, aggfunc="count", normalize=i - ) - tm.assert_frame_equal(empty, calculated) - - nans = pd.DataFrame( - [[0.0, np.nan], [0.0, 0.0]], - index=pd.Index([1, 2], name="a", dtype="int64"), - columns=pd.Index([3, 4], name="b"), - ) - - calculated = pd.crosstab( - df.a, df.b, values=df.c, aggfunc="count", normalize=False - ) - tm.assert_frame_equal(nans, calculated) - - def test_crosstab_errors(self): - # Issue 12578 - - df = pd.DataFrame( - {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} - ) - - error = "values cannot be used without an aggfunc." - with pytest.raises(ValueError, match=error): - pd.crosstab(df.a, df.b, values=df.c) - - error = "aggfunc cannot be used without values" - with pytest.raises(ValueError, match=error): - pd.crosstab(df.a, df.b, aggfunc=np.mean) - - error = "Not a valid normalize argument" - with pytest.raises(ValueError, match=error): - pd.crosstab(df.a, df.b, normalize="42") - - with pytest.raises(ValueError, match=error): - pd.crosstab(df.a, df.b, normalize=42) - - error = "Not a valid margins argument" - with pytest.raises(ValueError, match=error): - pd.crosstab(df.a, df.b, normalize="all", margins=42) - - def test_crosstab_with_categorial_columns(self): - # GH 8860 - df = pd.DataFrame( - { - "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"], - "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"], - } - ) - categories = ["Sedan", "Electric", "Pickup"] - df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories) - result = pd.crosstab(df["MAKE"], df["MODEL"]) - - expected_index = pd.Index(["Acura", "Honda", "Tesla"], name="MAKE") - expected_columns = pd.CategoricalIndex( - categories, categories=categories, ordered=False, name="MODEL" - ) - expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]] - expected = pd.DataFrame( - expected_data, index=expected_index, columns=expected_columns - ) - tm.assert_frame_equal(result, expected) - - def test_crosstab_with_numpy_size(self): - # GH 4003 - df = pd.DataFrame( - { - "A": ["one", "one", "two", "three"] * 6, - "B": ["A", "B", "C"] * 8, - "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, - "D": np.random.randn(24), - "E": np.random.randn(24), - } - ) - result = pd.crosstab( - index=[df["A"], df["B"]], - columns=[df["C"]], - margins=True, - aggfunc=np.size, - values=df["D"], - ) - expected_index = pd.MultiIndex( - levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]], - codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]], - names=["A", "B"], - ) - expected_column = pd.Index(["bar", "foo", "All"], dtype="object", name="C") - expected_data = np.array( - [ - [2.0, 2.0, 4.0], - [2.0, 2.0, 4.0], - [2.0, 2.0, 4.0], - [2.0, np.nan, 2.0], - [np.nan, 2.0, 2.0], - [2.0, np.nan, 2.0], - [np.nan, 2.0, 2.0], - [2.0, np.nan, 2.0], - [np.nan, 2.0, 2.0], - [12.0, 12.0, 24.0], - ] - ) - expected = pd.DataFrame( - expected_data, index=expected_index, columns=expected_column - ) - tm.assert_frame_equal(result, expected) - - def test_crosstab_dup_index_names(self): - # GH 13279 - s = pd.Series(range(3), name="foo") - - result = pd.crosstab(s, s) - expected_index = pd.Index(range(3), name="foo") - expected = pd.DataFrame( - np.eye(3, dtype=np.int64), index=expected_index, columns=expected_index - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]]) - def test_crosstab_tuple_name(self, names): - s1 = pd.Series(range(3), name=names[0]) - s2 = pd.Series(range(1, 4), name=names[1]) - - mi = pd.MultiIndex.from_arrays([range(3), range(1, 4)], names=names) - expected = pd.Series(1, index=mi).unstack(1, fill_value=0) - - result = pd.crosstab(s1, s2) - tm.assert_frame_equal(result, expected) - - def test_crosstab_both_tuple_names(self): - # GH 18321 - s1 = pd.Series(range(3), name=("a", "b")) - s2 = pd.Series(range(3), name=("c", "d")) - - expected = pd.DataFrame( - np.eye(3, dtype="int64"), - index=pd.Index(range(3), name=("a", "b")), - columns=pd.Index(range(3), name=("c", "d")), - ) - result = crosstab(s1, s2) - tm.assert_frame_equal(result, expected) - - def test_crosstab_unsorted_order(self): - df = pd.DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"]) - result = pd.crosstab(df.index, [df.b, df.a]) - e_idx = pd.Index(["A", "B", "C"], name="row_0") - e_columns = pd.MultiIndex.from_tuples( - [(1, 4), (2, 6), (3, 5)], names=["b", "a"] - ) - expected = pd.DataFrame( - [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns - ) - tm.assert_frame_equal(result, expected) - - def test_crosstab_normalize_multiple_columns(self): - # GH 15150 - df = pd.DataFrame( - { - "A": ["one", "one", "two", "three"] * 6, - "B": ["A", "B", "C"] * 8, - "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, - "D": [0] * 24, - "E": [0] * 24, - } - ) - result = pd.crosstab( - [df.A, df.B], - df.C, - values=df.D, - aggfunc=np.sum, - normalize=True, - margins=True, - ) - expected = pd.DataFrame( - np.array([0] * 29 + [1], dtype=float).reshape(10, 3), - columns=Index(["bar", "foo", "All"], dtype="object", name="C"), - index=MultiIndex.from_tuples( - [ - ("one", "A"), - ("one", "B"), - ("one", "C"), - ("three", "A"), - ("three", "B"), - ("three", "C"), - ("two", "A"), - ("two", "B"), - ("two", "C"), - ("All", ""), - ], - names=["A", "B"], - ), - ) - tm.assert_frame_equal(result, expected) - - def test_margin_normalize(self): - # GH 27500 - df = pd.DataFrame( - { - "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], - "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], - "C": [ - "small", - "large", - "large", - "small", - "small", - "large", - "small", - "small", - "large", - ], - "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], - "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], - } - ) - # normalize on index - result = pd.crosstab( - [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0 - ) - expected = pd.DataFrame( - [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]] - ) - expected.index = MultiIndex( - levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], - codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], - names=["A", "B"], - ) - expected.columns = Index(["large", "small"], dtype="object", name="C") - tm.assert_frame_equal(result, expected) - - # normalize on columns - result = pd.crosstab( - [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1 - ) - expected = pd.DataFrame( - [ - [0.25, 0.2, 0.222222], - [0.25, 0.2, 0.222222], - [0.5, 0.2, 0.333333], - [0, 0.4, 0.222222], - ] - ) - expected.columns = Index( - ["large", "small", "Sub-Total"], dtype="object", name="C" - ) - expected.index = MultiIndex( - levels=[["bar", "foo"], ["one", "two"]], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=["A", "B"], - ) - tm.assert_frame_equal(result, expected) - - # normalize on both index and column - result = pd.crosstab( - [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True - ) - expected = pd.DataFrame( - [ - [0.111111, 0.111111, 0.222222], - [0.111111, 0.111111, 0.222222], - [0.222222, 0.111111, 0.333333], - [0.000000, 0.222222, 0.222222], - [0.444444, 0.555555, 1], - ] - ) - expected.columns = Index( - ["large", "small", "Sub-Total"], dtype="object", name="C" - ) - expected.index = MultiIndex( - levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], - codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], - names=["A", "B"], - ) - tm.assert_frame_equal(result, expected) From ae79bb23c7d10bd5b307f542c1a1c78d1600aecb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Mar 2020 14:27:56 -0700 Subject: [PATCH 281/388] CLN: remove Categorical.put (#32554) --- pandas/core/arrays/categorical.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 92859479ec73f..ba4c2e168e0c4 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1409,12 +1409,6 @@ def notna(self): notnull = notna - def put(self, *args, **kwargs): - """ - Replace specific elements in the Categorical with given values. - """ - raise NotImplementedError(("'put' is not yet implemented for Categorical")) - def dropna(self): """ Return the Categorical without null values. From 76a1710c70e42ba03c65fbc1ffdfd718981848f3 Mon Sep 17 00:00:00 2001 From: Stijn Van Hoey Date: Tue, 10 Mar 2020 08:18:28 +0100 Subject: [PATCH 282/388] DOC: Add missing question mark icon (#32564) --- doc/source/_static/question_mark_noback.svg | 72 +++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 doc/source/_static/question_mark_noback.svg diff --git a/doc/source/_static/question_mark_noback.svg b/doc/source/_static/question_mark_noback.svg new file mode 100644 index 0000000000000..3abb4b806d20a --- /dev/null +++ b/doc/source/_static/question_mark_noback.svg @@ -0,0 +1,72 @@ + + + + + + + + + + image/svg+xml + + + + + + + ? + + From 3d591cb040d30f950778aacff845606917160529 Mon Sep 17 00:00:00 2001 From: Rushabh Vasani Date: Tue, 10 Mar 2020 14:05:47 +0530 Subject: [PATCH 283/388] DOC: add redirects from Rolling to rolling.Rolling (#31875) Co-authored-by: Joris Van den Bossche --- doc/redirects.csv | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/redirects.csv b/doc/redirects.csv index ef93955c14fe6..3669ff4b7cc0b 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -49,7 +49,25 @@ internals,development/internals # api moved function reference/api/pandas.io.json.json_normalize,pandas.json_normalize -# api rename +# rename due to refactors +reference/api/pandas.core.window.Rolling,pandas.core.window.rolling.Rolling +reference/api/pandas.core.window.Rolling.aggregate,pandas.core.window.rolling.Rolling.aggregate +reference/api/pandas.core.window.Rolling.apply,pandas.core.window.rolling.Rolling.apply +reference/api/pandas.core.window.Rolling.corr,pandas.core.window.rolling.Rolling.corr +reference/api/pandas.core.window.Rolling.count,pandas.core.window.rolling.Rolling.count +reference/api/pandas.core.window.Rolling.cov,pandas.core.window.rolling.Rolling.cov +reference/api/pandas.core.window.Rolling.kurt,pandas.core.window.rolling.Rolling.kurt +reference/api/pandas.core.window.Rolling.max,pandas.core.window.rolling.Rolling.max +reference/api/pandas.core.window.Rolling.mean,pandas.core.window.rolling.Rolling.mean +reference/api/pandas.core.window.Rolling.median,pandas.core.window.rolling.Rolling.median +reference/api/pandas.core.window.Rolling.min,pandas.core.window.rolling.Rolling.min +reference/api/pandas.core.window.Rolling.quantile,pandas.core.window.rolling.Rolling.quantile +reference/api/pandas.core.window.Rolling.skew,pandas.core.window.rolling.Rolling.skew +reference/api/pandas.core.window.Rolling.std,pandas.core.window.rolling.Rolling.std +reference/api/pandas.core.window.Rolling.sum,pandas.core.window.rolling.Rolling.sum +reference/api/pandas.core.window.Rolling.var,pandas.core.window.rolling.Rolling.var + +# api url change (generated -> reference/api rename) api,reference/index generated/pandas.api.extensions.ExtensionArray.argsort,../reference/api/pandas.api.extensions.ExtensionArray.argsort generated/pandas.api.extensions.ExtensionArray.astype,../reference/api/pandas.api.extensions.ExtensionArray.astype From c143f0871aa2fc24e1e57a1adab58b43649b7ea0 Mon Sep 17 00:00:00 2001 From: andresmcneill <33670824+andresmcneill@users.noreply.github.com> Date: Tue, 10 Mar 2020 06:37:29 -0400 Subject: [PATCH 284/388] TST: Fix bare pytest raises in generic/test_frame.py (#32565) --- pandas/tests/generic/test_frame.py | 31 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index dca65152e82db..8fe49b2ec2299 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -72,9 +72,10 @@ def test_nonzero_single_element(self): assert not df.bool() df = DataFrame([[False, False]]) - with pytest.raises(ValueError): + msg = "The truth value of a DataFrame is ambiguous" + with pytest.raises(ValueError, match=msg): df.bool() - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): bool(df) def test_get_numeric_data_preserve_dtype(self): @@ -189,30 +190,31 @@ class TestDataFrame2: def test_validate_bool_args(self, value): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - with pytest.raises(ValueError): + msg = 'For argument "inplace" expected type bool, received type' + with pytest.raises(ValueError, match=msg): super(DataFrame, df).rename_axis( mapper={"a": "x", "b": "y"}, axis=1, inplace=value ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): super(DataFrame, df).drop("a", axis=1, inplace=value) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): super(DataFrame, df)._consolidate(inplace=value) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): super(DataFrame, df).fillna(value=0, inplace=value) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): super(DataFrame, df).replace(to_replace=1, value=7, inplace=value) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): super(DataFrame, df).interpolate(inplace=value) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): super(DataFrame, df)._where(cond=df.a > 2, inplace=value) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg): super(DataFrame, df).mask(cond=df.a > 2, inplace=value) def test_unexpected_keyword(self): @@ -222,16 +224,17 @@ def test_unexpected_keyword(self): ts = df["joe"].copy() ts[2] = np.nan - with pytest.raises(TypeError, match="unexpected keyword"): + msg = "unexpected keyword" + with pytest.raises(TypeError, match=msg): df.drop("joe", axis=1, in_place=True) - with pytest.raises(TypeError, match="unexpected keyword"): + with pytest.raises(TypeError, match=msg): df.reindex([1, 0], inplace=True) - with pytest.raises(TypeError, match="unexpected keyword"): + with pytest.raises(TypeError, match=msg): ca.fillna(0, inplace=True) - with pytest.raises(TypeError, match="unexpected keyword"): + with pytest.raises(TypeError, match=msg): ts.fillna(0, in_place=True) From dc4de5847dea46e0d4350240034359dfb6bb9ac3 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 10 Mar 2020 11:09:20 +0000 Subject: [PATCH 285/388] TST: add test.indexes.common.Base.create_index and annotate .create_index (#32567) --- pandas/tests/indexes/common.py | 3 +++ pandas/tests/indexes/datetimes/test_datetimelike.py | 2 +- pandas/tests/indexes/period/test_period.py | 2 +- pandas/tests/indexes/ranges/test_range.py | 2 +- pandas/tests/indexes/test_base.py | 4 ++-- pandas/tests/indexes/test_numeric.py | 6 +++--- pandas/tests/indexes/timedeltas/test_timedelta.py | 2 +- 7 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 69451501fd7bd..e5af0d9c03979 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -35,6 +35,9 @@ class Base: _holder: Optional[Type[Index]] = None _compat_props = ["shape", "ndim", "size", "nbytes"] + def create_index(self) -> Index: + raise NotImplementedError("Method not implemented") + def test_pickle_compat_construction(self): # need an object to create with msg = ( diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index da1bd6f091d1a..e4785e5f80256 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -17,7 +17,7 @@ class TestDatetimeIndex(DatetimeLike): def indices(self, request): return request.param - def create_index(self): + def create_index(self) -> DatetimeIndex: return date_range("20130101", periods=5) def test_shift(self): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index b4c223be0f6a5..03f0be3f368cb 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -35,7 +35,7 @@ class TestPeriodIndex(DatetimeLike): def indices(self, request): return request.param - def create_index(self): + def create_index(self) -> PeriodIndex: return period_range("20130101", periods=5, freq="D") def test_pickle_compat_construction(self): diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index c1cc23039eeaf..61ac937f5fda0 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -30,7 +30,7 @@ class TestRangeIndex(Numeric): def indices(self, request): return request.param - def create_index(self): + def create_index(self) -> RangeIndex: return RangeIndex(start=0, stop=20, step=2) def test_can_hold_identifiers(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9e0cef375fea3..2981f56e58ecc 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -59,7 +59,7 @@ def index(self, request): # copy to avoid mutation, e.g. setting .name return indices_dict[key].copy() - def create_index(self): + def create_index(self) -> Index: return Index(list("abcde")) def test_can_hold_identifiers(self): @@ -2277,7 +2277,7 @@ class TestMixedIntIndex(Base): def indices(self, request): return Index(request.param) - def create_index(self): + def create_index(self) -> Index: return Index([0, "a", 1, "b", 2, "c"]) def test_argsort(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 10d57d8616cf3..23877c2c7607a 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -118,7 +118,7 @@ def mixed_index(self): def float_index(self): return Float64Index([0.0, 2.5, 5.0, 7.5, 10.0]) - def create_index(self): + def create_index(self) -> Float64Index: return Float64Index(np.arange(5, dtype="float64")) def test_repr_roundtrip(self, indices): @@ -663,7 +663,7 @@ class TestInt64Index(NumericInt): def indices(self, request): return Int64Index(request.param) - def create_index(self): + def create_index(self) -> Int64Index: # return Int64Index(np.arange(5, dtype="int64")) return Int64Index(range(0, 20, 2)) @@ -801,7 +801,7 @@ def index_large(self): large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] return UInt64Index(large) - def create_index(self): + def create_index(self) -> UInt64Index: # compat with shared Int64/Float64 tests; use index_large for UInt64 only tests return UInt64Index(np.arange(5, dtype="uint64")) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index d4a94f8693081..971203d6fc720 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -28,7 +28,7 @@ class TestTimedeltaIndex(DatetimeLike): def indices(self): return tm.makeTimedeltaIndex(10) - def create_index(self): + def create_index(self) -> TimedeltaIndex: return pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) def test_numeric_compat(self): From 5000420987564902e34252d1308f08751bb65b6d Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 10 Mar 2020 12:17:12 +0100 Subject: [PATCH 286/388] Fix warning in unit test (#32563) --- pandas/tests/io/excel/test_openpyxl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 10ed192062d9c..60c943d95e510 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -114,7 +114,7 @@ def test_to_excel_with_openpyxl_engine(ext, tmpdir): df2 = DataFrame({"B": np.linspace(1, 20, 10)}) df = pd.concat([df1, df2], axis=1) styled = df.style.applymap( - lambda val: "color: %s" % "red" if val < 0 else "black" + lambda val: "color: %s" % ("red" if val < 0 else "black") ).highlight_max() filename = tmpdir / "styled.xlsx" From 00bb09d9e8701abc687b9e188c7a8f6d9cce3e01 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 04:24:33 -0700 Subject: [PATCH 287/388] CLN: remove unused in pd._testing (#32534) --- pandas/_testing.py | 93 ++-------------------------------------------- 1 file changed, 3 insertions(+), 90 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 5e94ac3b3d108..0c79a9425b8f6 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -32,7 +32,6 @@ is_datetime64tz_dtype, is_extension_array_dtype, is_interval_dtype, - is_list_like, is_number, is_numeric_dtype, is_period_dtype, @@ -418,10 +417,7 @@ def rands_array(nchars, size, dtype="O"): .view((np.str_, nchars)) .reshape(size) ) - if dtype is None: - return retval - else: - return retval.astype(dtype) + return retval.astype(dtype) def randu_array(nchars, size, dtype="O"): @@ -433,10 +429,7 @@ def randu_array(nchars, size, dtype="O"): .view((np.unicode_, nchars)) .reshape(size) ) - if dtype is None: - return retval - else: - return retval.astype(dtype) + return retval.astype(dtype) def rands(nchars): @@ -449,16 +442,6 @@ def rands(nchars): return "".join(np.random.choice(RANDS_CHARS, nchars)) -def randu(nchars): - """ - Generate one random unicode string. - - See `randu_array` if you want to create an array of random unicode strings. - - """ - return "".join(np.random.choice(RANDU_CHARS, nchars)) - - def close(fignum=None): from matplotlib.pyplot import get_fignums, close as _close @@ -725,10 +708,7 @@ def repr_class(x): # return Index as it is to include values in the error message return x - try: - return type(x).__name__ - except AttributeError: - return repr(type(x)) + return type(x).__name__ if exact == "equiv": if type(left) != type(right): @@ -2093,53 +2073,6 @@ def _gen_unique_rand(rng, _extra_size): return i.tolist(), j.tolist() -def makeMissingCustomDataframe( - nrows, - ncols, - density=0.9, - random_state=None, - c_idx_names=True, - r_idx_names=True, - c_idx_nlevels=1, - r_idx_nlevels=1, - data_gen_f=None, - c_ndupe_l=None, - r_ndupe_l=None, - dtype=None, - c_idx_type=None, - r_idx_type=None, -): - """ - Parameters - ---------- - Density : float, optional - Float in (0, 1) that gives the percentage of non-missing numbers in - the DataFrame. - random_state : {np.random.RandomState, int}, optional - Random number generator or random seed. - - See makeCustomDataframe for descriptions of the rest of the parameters. - """ - df = makeCustomDataframe( - nrows, - ncols, - c_idx_names=c_idx_names, - r_idx_names=r_idx_names, - c_idx_nlevels=c_idx_nlevels, - r_idx_nlevels=r_idx_nlevels, - data_gen_f=data_gen_f, - c_ndupe_l=c_ndupe_l, - r_ndupe_l=r_ndupe_l, - dtype=dtype, - c_idx_type=c_idx_type, - r_idx_type=r_idx_type, - ) - - i, j = _create_missing_idx(nrows, ncols, density, random_state) - df.values[i, j] = np.nan - return df - - def makeMissingDataframe(density=0.9, random_state=None): df = makeDataFrame() i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) @@ -2387,7 +2320,6 @@ def wrapper(*args, **kwargs): def assert_produces_warning( expected_warning=Warning, filter_level="always", - clear=None, check_stacklevel=True, raise_on_extra_warnings=True, ): @@ -2417,12 +2349,6 @@ class for all warnings. To check that no warning is returned, from each module * "once" - print the warning the first time it is generated - clear : str, default None - If not ``None`` then remove any previously raised warnings from - the ``__warningsregistry__`` to ensure that no warning messages are - suppressed by this context manager. If ``None`` is specified, - the ``__warningsregistry__`` keeps track of which warnings have been - shown, and does not show them again. check_stacklevel : bool, default True If True, displays the line that called the function containing the warning to show were the function is called. Otherwise, the @@ -2455,19 +2381,6 @@ class for all warnings. To check that no warning is returned, with warnings.catch_warnings(record=True) as w: - if clear is not None: - # make sure that we are clearing these warnings - # if they have happened before - # to guarantee that we will catch them - if not is_list_like(clear): - clear = [clear] - for m in clear: - try: - m.__warningregistry__.clear() - except AttributeError: - # module may not have __warningregistry__ - pass - saw_warning = False warnings.simplefilter(filter_level) yield w From 5b45f4b18a9171ec6351897710e140424254e601 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 04:43:37 -0700 Subject: [PATCH 288/388] TST: remove unused kwargs in assert_sp_array_equal (#32525) --- pandas/_testing.py | 44 +++++++------------------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 0c79a9425b8f6..62a6dacac3b0c 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1461,14 +1461,7 @@ def to_array(obj): # Sparse -def assert_sp_array_equal( - left, - right, - check_dtype=True, - check_kind=True, - check_fill_value=True, - consolidate_block_indices=False, -): +def assert_sp_array_equal(left, right): """ Check that the left and right SparseArray are equal. @@ -1476,38 +1469,17 @@ def assert_sp_array_equal( ---------- left : SparseArray right : SparseArray - check_dtype : bool, default True - Whether to check the data dtype is identical. - check_kind : bool, default True - Whether to just the kind of the sparse index for each column. - check_fill_value : bool, default True - Whether to check that left.fill_value matches right.fill_value - consolidate_block_indices : bool, default False - Whether to consolidate contiguous blocks for sparse arrays with - a BlockIndex. Some operations, e.g. concat, will end up with - block indices that could be consolidated. Setting this to true will - create a new BlockIndex for that array, with consolidated - block indices. """ _check_isinstance(left, right, pd.arrays.SparseArray) - assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype) + assert_numpy_array_equal(left.sp_values, right.sp_values) # SparseIndex comparison assert isinstance(left.sp_index, pd._libs.sparse.SparseIndex) assert isinstance(right.sp_index, pd._libs.sparse.SparseIndex) - if not check_kind: - left_index = left.sp_index.to_block_index() - right_index = right.sp_index.to_block_index() - else: - left_index = left.sp_index - right_index = right.sp_index - - if consolidate_block_indices and left.kind == "block": - # we'll probably remove this hack... - left_index = left_index.to_int_index().to_block_index() - right_index = right_index.to_int_index().to_block_index() + left_index = left.sp_index + right_index = right.sp_index if not left_index.equals(right_index): raise_assert_detail( @@ -1517,11 +1489,9 @@ def assert_sp_array_equal( # Just ensure a pass - if check_fill_value: - assert_attr_equal("fill_value", left, right) - if check_dtype: - assert_attr_equal("dtype", left, right) - assert_numpy_array_equal(left.to_dense(), right.to_dense(), check_dtype=check_dtype) + assert_attr_equal("fill_value", left, right) + assert_attr_equal("dtype", left, right) + assert_numpy_array_equal(left.to_dense(), right.to_dense()) # ----------------------------------------------------------------------------- From db9a50c1c5c028b4aa1df31add90aa89b2dfcafb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 04:48:32 -0700 Subject: [PATCH 289/388] CLN: unused code in reshape.merge (#32509) --- pandas/core/reshape/merge.py | 49 ++++++++---------------------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c301d6e7c7155..08c04cb49f125 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -92,9 +92,7 @@ def merge( merge.__doc__ = _merge_doc % "\nleft : DataFrame" -def _groupby_and_merge( - by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True -): +def _groupby_and_merge(by, on, left: "DataFrame", right: "DataFrame", merge_pieces): """ groupby & merge; we are always performing a left-by type operation @@ -102,11 +100,9 @@ def _groupby_and_merge( ---------- by: field to group on: duplicates field - left: left frame - right: right frame - _merge_pieces: function for merging - check_duplicates: bool, default True - should we check & clean duplicates + left: DataFrame + right: DataFrame + merge_pieces: function for merging """ pieces = [] if not isinstance(by, (list, tuple)): @@ -118,18 +114,6 @@ def _groupby_and_merge( # if we can groupby the rhs # then we can get vastly better perf - # we will check & remove duplicates if indicated - if check_duplicates: - if on is None: - on = [] - elif not isinstance(on, (list, tuple)): - on = [on] - - if right.duplicated(by + on).any(): - _right = right.drop_duplicates(by + on, keep="last") - # TODO: use overload to refine return type of drop_duplicates - assert _right is not None # needed for mypy - right = _right try: rby = right.groupby(by, sort=False) except KeyError: @@ -151,16 +135,13 @@ def _groupby_and_merge( pieces.append(merged) continue - merged = _merge_pieces(lhs, rhs) + merged = merge_pieces(lhs, rhs) # make sure join keys are in the merged - # TODO, should _merge_pieces do this? + # TODO, should merge_pieces do this? for k in by: - try: - if k in merged: - merged[k] = key - except KeyError: - pass + if k in merged: + merged[k] = key pieces.append(merged) @@ -287,16 +268,11 @@ def _merger(x, y): raise ValueError("Can only group either left or right frames") elif left_by is not None: result, _ = _groupby_and_merge( - left_by, on, left, right, lambda x, y: _merger(x, y), check_duplicates=False + left_by, on, left, right, lambda x, y: _merger(x, y) ) elif right_by is not None: result, _ = _groupby_and_merge( - right_by, - on, - right, - left, - lambda x, y: _merger(y, x), - check_duplicates=False, + right_by, on, right, left, lambda x, y: _merger(y, x) ) else: result = _merger(left, right) @@ -1605,11 +1581,6 @@ def _validate_specification(self): if self.direction not in ["backward", "forward", "nearest"]: raise MergeError(f"direction invalid: {self.direction}") - @property - def _asof_key(self): - """ This is our asof key, the 'on' """ - return self.left_on[-1] - def _get_merge_keys(self): # note this function has side effects From 113c2559ff10df03d9d8803ac84455904d408e9b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 06:31:25 -0700 Subject: [PATCH 290/388] CLN: remove unnecessary to_dense (#32533) --- pandas/io/formats/format.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2a528781f8c93..c879eaeda64e0 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1352,8 +1352,6 @@ def format_values_with(float_format): values = self.values is_complex = is_complex_dtype(values) mask = isna(values) - if hasattr(values, "to_dense"): # sparse numpy ndarray - values = values.to_dense() values = np.array(values, dtype="object") values[mask] = na_rep imask = (~mask).ravel() From 37659d47a685ecc5f5117aa56526ece0106c6d0f Mon Sep 17 00:00:00 2001 From: Prakhar Pandey Date: Tue, 10 Mar 2020 19:38:00 +0530 Subject: [PATCH 291/388] BUG: string methods with NA (#31684) --- doc/source/whatsnew/v1.0.2.rst | 4 ++++ pandas/core/strings.py | 2 ++ pandas/tests/test_strings.py | 12 ++++++++++++ 3 files changed, 18 insertions(+) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index eec471f989037..462f243f14494 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -85,6 +85,10 @@ Bug fixes - Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +**Strings** + +- Using ``pd.NA`` with :meth:`Series.str.repeat` now correctly outputs a null value instead of raising error for vector inputs (:issue:`31632`) + .. --------------------------------------------------------------------------- .. _whatsnew_102.contributors: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c4bdbaff1649c..71d9e8e7a577c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -775,6 +775,8 @@ def scalar_rep(x): else: def rep(x, r): + if x is libmissing.NA: + return x try: return bytes.__mul__(x, r) except TypeError: diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 1338d801e39f4..6abf174aa7fd2 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1157,6 +1157,18 @@ def test_repeat(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) + def test_repeat_with_null(self): + # GH: 31632 + values = Series(["a", None], dtype="string") + result = values.str.repeat([3, 4]) + exp = Series(["aaa", None], dtype="string") + tm.assert_series_equal(result, exp) + + values = Series(["a", "b"], dtype="string") + result = values.str.repeat([3, None]) + exp = Series(["aaa", None], dtype="string") + tm.assert_series_equal(result, exp) + def test_match(self): # New match behavior introduced in 0.13 values = Series(["fooBAD__barBAD", np.nan, "foo"]) From a77ad8bf04da4bb4e5a30fc375b4b59f2d0860ab Mon Sep 17 00:00:00 2001 From: Max Chen Date: Wed, 11 Mar 2020 04:30:07 +0800 Subject: [PATCH 292/388] ENH: `Styler.highlight_null` can accepts `subset` argument (#31350) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/io/formats/style.py | 14 +++++++++++--- pandas/tests/io/formats/test_style.py | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e745bf3f5feed..bd55a25b22e0d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -64,6 +64,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`) +- :meth:`Styler.highlight_null` now accepts ``subset`` argument (:issue:`31345`) - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`) - `OptionError` is now exposed in `pandas.errors` (:issue:`27553`) - :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 018441dacd9a8..9cdb56dc6362a 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1003,19 +1003,27 @@ def hide_columns(self, subset) -> "Styler": def _highlight_null(v, null_color: str) -> str: return f"background-color: {null_color}" if pd.isna(v) else "" - def highlight_null(self, null_color: str = "red") -> "Styler": + def highlight_null( + self, + null_color: str = "red", + subset: Optional[Union[Label, Sequence[Label]]] = None, + ) -> "Styler": """ Shade the background ``null_color`` for missing values. Parameters ---------- - null_color : str + null_color : str, default 'red' + subset : label or list of labels, default None + A valid slice for ``data`` to limit the style application to. + + .. versionadded:: 1.1.0 Returns ------- self : Styler """ - self.applymap(self._highlight_null, null_color=null_color) + self.applymap(self._highlight_null, null_color=null_color, subset=subset) return self def background_gradient( diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index a2659079be7c0..a94667a3f0c34 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1091,6 +1091,23 @@ def test_highlight_null(self, null_color="red"): expected = {(0, 0): [""], (1, 0): ["background-color: red"]} assert result == expected + def test_highlight_null_subset(self): + # GH 31345 + df = pd.DataFrame({"A": [0, np.nan], "B": [0, np.nan]}) + result = ( + df.style.highlight_null(null_color="red", subset=["A"]) + .highlight_null(null_color="green", subset=["B"]) + ._compute() + .ctx + ) + expected = { + (0, 0): [""], + (1, 0): ["background-color: red"], + (0, 1): [""], + (1, 1): ["background-color: green"], + } + assert result == expected + def test_nonunique_raises(self): df = pd.DataFrame([[1, 2]], columns=["A", "A"]) with pytest.raises(ValueError): From 79d3b085aaa87b70a43bad08cd5163532c733435 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 10 Mar 2020 15:46:42 -0500 Subject: [PATCH 293/388] DOC: cleanup 1.0.2 whatsnew (#32592) --- doc/source/whatsnew/v1.0.2.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 462f243f14494..9841df0507138 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -1,7 +1,7 @@ .. _whatsnew_102: -What's new in 1.0.2 (February ??, 2020) ---------------------------------------- +What's new in 1.0.2 (March 11, 2020) +------------------------------------ These are the changes in pandas 1.0.2. See :ref:`release` for a full changelog including other versions of pandas. @@ -18,13 +18,13 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`) - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`) -- Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) -- Fixed regression in :meth:`DataFrameGroupBy.nunique` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) +- Fixed regression in :meth:`pandas.core.window.Rolling.corr` when using a time offset (:issue:`31789`) +- Fixed regression in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) -- Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) +- Fixed regression in :meth:`pandas.core.groupby.GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) -- Fixed bug in the repr of an object-dtype ``Index`` with bools and missing values (:issue:`32146`) +- Fixed bug in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) - .. --------------------------------------------------------------------------- @@ -82,7 +82,7 @@ Bug fixes - Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) -- Fixed bug where :meth:`GroupBy.first` and :meth:`GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) +- Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) **Strings** From 09a46a4e465b19fa688f1d58124d266c0bbbebf0 Mon Sep 17 00:00:00 2001 From: sagungrp Date: Wed, 11 Mar 2020 04:30:05 +0700 Subject: [PATCH 294/388] DOC: Add extended summary, update parameter types desc, update return types desc, and add whitespaces after commas in list declarations to DataFrame.first in core/generic.py (#32018) Co-authored-by: Sang Agung Co-authored-by: Marc Garcia --- pandas/core/generic.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e6c5ac9dbf733..b039630efc989 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8014,15 +8014,21 @@ def resample( def first(self: FrameOrSeries, offset) -> FrameOrSeries: """ - Method to subset initial periods of time series data based on a date offset. + Select initial periods of time series data based on a date offset. + + When having a DataFrame with dates as index, this function can + select the first few rows based on a date offset. Parameters ---------- - offset : str, DateOffset, dateutil.relativedelta + offset : str, DateOffset or dateutil.relativedelta + The offset length of the data that will be selected. For instance, + '1M' will display all the rows having their index within the first month. Returns ------- - subset : same type as caller + Series or DataFrame + A subset of the caller. Raises ------ @@ -8038,7 +8044,7 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: Examples -------- >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') - >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) >>> ts A 2018-04-09 1 From 1951c8ea6cd0d2a2732e4f9aecbc8e3de9c0364d Mon Sep 17 00:00:00 2001 From: Derek McCammond Date: Tue, 10 Mar 2020 19:07:47 -0400 Subject: [PATCH 295/388] CLN: Avoid bare pytest.raises in computation/test_eval.py (#32507) --- pandas/tests/computation/test_eval.py | 102 ++++++++++++++++---------- 1 file changed, 65 insertions(+), 37 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index a240e6cef5930..08d8d5ca342b7 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -375,7 +375,8 @@ def check_pow(self, lhs, arith1, rhs): and is_scalar(rhs) and _is_py3_complex_incompat(result, expected) ): - with pytest.raises(AssertionError): + msg = "(DataFrame.columns|numpy array) are different" + with pytest.raises(AssertionError, match=msg): tm.assert_numpy_array_equal(result, expected) else: tm.assert_almost_equal(result, expected) @@ -449,16 +450,19 @@ def test_frame_invert(self): # float always raises lhs = DataFrame(randn(5, 2)) if self.engine == "numexpr": - with pytest.raises(NotImplementedError): + msg = "couldn't find matching opcode for 'invert_dd'" + with pytest.raises(NotImplementedError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with pytest.raises(TypeError): + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) # int raises on numexpr lhs = DataFrame(randint(5, size=(5, 2))) if self.engine == "numexpr": - with pytest.raises(NotImplementedError): + msg = "couldn't find matching opcode for 'invert" + with pytest.raises(NotImplementedError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = ~lhs @@ -474,10 +478,11 @@ def test_frame_invert(self): # object raises lhs = DataFrame({"b": ["a", 1, 2.0], "c": rand(3) > 0.5}) if self.engine == "numexpr": - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="unknown type object"): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with pytest.raises(TypeError): + msg = "bad operand type for unary ~: 'str'" + with pytest.raises(TypeError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) def test_series_invert(self): @@ -488,16 +493,19 @@ def test_series_invert(self): # float raises lhs = Series(randn(5)) if self.engine == "numexpr": - with pytest.raises(NotImplementedError): + msg = "couldn't find matching opcode for 'invert_dd'" + with pytest.raises(NotImplementedError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with pytest.raises(TypeError): + msg = "ufunc 'invert' not supported for the input types" + with pytest.raises(TypeError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) # int raises on numexpr lhs = Series(randint(5, size=5)) if self.engine == "numexpr": - with pytest.raises(NotImplementedError): + msg = "couldn't find matching opcode for 'invert" + with pytest.raises(NotImplementedError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = ~lhs @@ -517,10 +525,11 @@ def test_series_invert(self): # object lhs = Series(["a", 1, 2.0]) if self.engine == "numexpr": - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="unknown type object"): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with pytest.raises(TypeError): + msg = "bad operand type for unary ~: 'str'" + with pytest.raises(TypeError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) def test_frame_negate(self): @@ -541,7 +550,8 @@ def test_frame_negate(self): # bool doesn't work with numexpr but works elsewhere lhs = DataFrame(rand(5, 2) > 0.5) if self.engine == "numexpr": - with pytest.raises(NotImplementedError): + msg = "couldn't find matching opcode for 'neg_bb'" + with pytest.raises(NotImplementedError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = -lhs @@ -566,7 +576,8 @@ def test_series_negate(self): # bool doesn't work with numexpr but works elsewhere lhs = Series(rand(5) > 0.5) if self.engine == "numexpr": - with pytest.raises(NotImplementedError): + msg = "couldn't find matching opcode for 'neg_bb'" + with pytest.raises(NotImplementedError, match=msg): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = -lhs @@ -610,7 +621,8 @@ def test_series_pos(self, lhs): tm.assert_series_equal(expect, result) def test_scalar_unary(self): - with pytest.raises(TypeError): + msg = "bad operand type for unary ~: 'float'" + with pytest.raises(TypeError, match=msg): pd.eval("~1.0", engine=self.engine, parser=self.parser) assert pd.eval("-1.0", parser=self.parser, engine=self.engine) == -1.0 @@ -671,7 +683,8 @@ def test_disallow_scalar_bool_ops(self): x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) # noqa for ex in exprs: - with pytest.raises(NotImplementedError): + msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" + with pytest.raises(NotImplementedError, match=msg): pd.eval(ex, engine=self.engine, parser=self.parser) def test_identical(self): @@ -772,7 +785,8 @@ def setup_ops(self): def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): ex1 = f"lhs {cmp1} mid {cmp2} rhs" - with pytest.raises(NotImplementedError): + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): pd.eval(ex1, engine=self.engine, parser=self.parser) @@ -1183,7 +1197,8 @@ def test_bool_ops_with_constants(self): def test_4d_ndarray_fails(self): x = randn(3, 4, 5, 6) y = Series(randn(10)) - with pytest.raises(NotImplementedError): + msg = "N-dimensional objects, where N > 2, are not supported with eval" + with pytest.raises(NotImplementedError, match=msg): self.eval("x + y", local_dict={"x": x, "y": y}) def test_constant(self): @@ -1232,7 +1247,7 @@ def test_truediv(self): def test_failing_subscript_with_name_error(self): df = DataFrame(np.random.randn(5, 3)) # noqa - with pytest.raises(NameError): + with pytest.raises(NameError, match="name 'x' is not defined"): self.eval("df[x > 2] > 2") def test_lhs_expression_subscript(self): @@ -1379,7 +1394,8 @@ def test_multi_line_expression(self): assert ans is None # multi-line not valid if not all assignments - with pytest.raises(ValueError): + msg = "Multi-line expressions are only valid if all expressions contain" + with pytest.raises(ValueError, match=msg): df.eval( """ a = b + 2 @@ -1474,7 +1490,8 @@ def test_assignment_in_query(self): # GH 8664 df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() - with pytest.raises(ValueError): + msg = "cannot assign without a target object" + with pytest.raises(ValueError, match=msg): df.query("a = 1") tm.assert_frame_equal(df, df_orig) @@ -1593,19 +1610,21 @@ def test_simple_in_ops(self): ) assert res else: - with pytest.raises(NotImplementedError): + msg = "'In' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): pd.eval("1 in [1, 2]", engine=self.engine, parser=self.parser) - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError, match=msg): pd.eval("2 in (1, 2)", engine=self.engine, parser=self.parser) - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError, match=msg): pd.eval("3 in (1, 2)", engine=self.engine, parser=self.parser) - with pytest.raises(NotImplementedError): - pd.eval("3 not in (1, 2)", engine=self.engine, parser=self.parser) - with pytest.raises(NotImplementedError): + with pytest.raises(NotImplementedError, match=msg): pd.eval( "[(3,)] in (1, 2, [(3,)])", engine=self.engine, parser=self.parser ) - with pytest.raises(NotImplementedError): + msg = "'NotIn' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval("3 not in (1, 2)", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError, match=msg): pd.eval( "[3] not in (1, 2, [[3]])", engine=self.engine, parser=self.parser ) @@ -1664,13 +1683,15 @@ def test_fails_not(self): def test_fails_ampersand(self): df = DataFrame(np.random.randn(5, 3)) # noqa ex = "(df + 2)[df > 1] > 0 & (df > 0)" - with pytest.raises(NotImplementedError): + msg = "cannot evaluate scalar only bool ops" + with pytest.raises(NotImplementedError, match=msg): pd.eval(ex, parser=self.parser, engine=self.engine) def test_fails_pipe(self): df = DataFrame(np.random.randn(5, 3)) # noqa ex = "(df + 2)[df > 1] > 0 | (df > 0)" - with pytest.raises(NotImplementedError): + msg = "cannot evaluate scalar only bool ops" + with pytest.raises(NotImplementedError, match=msg): pd.eval(ex, parser=self.parser, engine=self.engine) def test_bool_ops_with_constants(self): @@ -1679,7 +1700,8 @@ def test_bool_ops_with_constants(self): ): ex = f"{lhs} {op} {rhs}" if op in ("and", "or"): - with pytest.raises(NotImplementedError): + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): self.eval(ex) else: res = self.eval(ex) @@ -1690,7 +1712,8 @@ def test_simple_bool_ops(self): for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), (True, False)): ex = f"lhs {op} rhs" if op in ("and", "or"): - with pytest.raises(NotImplementedError): + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): pd.eval(ex, engine=self.engine, parser=self.parser) else: res = pd.eval(ex, engine=self.engine, parser=self.parser) @@ -1902,19 +1925,21 @@ def test_disallowed_nodes(engine, parser): inst = VisitorClass("x + 1", engine, parser) for ops in uns_ops: - with pytest.raises(NotImplementedError): + msg = "nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): getattr(inst, ops)() def test_syntax_error_exprs(engine, parser): e = "s +" - with pytest.raises(SyntaxError): + with pytest.raises(SyntaxError, match="invalid syntax"): pd.eval(e, engine=engine, parser=parser) def test_name_error_exprs(engine, parser): e = "s + t" - with pytest.raises(NameError): + msg = "name 's' is not defined" + with pytest.raises(NameError, match=msg): pd.eval(e, engine=engine, parser=parser) @@ -1973,7 +1998,8 @@ def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): ex2 = f"lhs {cmp} mid and mid {cmp} rhs" ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)" for ex in (ex1, ex2, ex3): - with pytest.raises(NotImplementedError): + msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" + with pytest.raises(NotImplementedError, match=msg): pd.eval(ex, engine=engine, parser=parser) @@ -2029,7 +2055,8 @@ def test_negate_lt_eq_le(engine, parser): tm.assert_frame_equal(result, expected) if parser == "python": - with pytest.raises(NotImplementedError): + msg = "'Not' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): df.query("not (cat > 0)", engine=engine, parser=parser) else: result = df.query("not (cat > 0)", engine=engine, parser=parser) @@ -2041,5 +2068,6 @@ def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with pytest.raises(ValueError): + msg = 'For argument "inplace" expected type bool, received type' + with pytest.raises(ValueError, match=msg): pd.eval("2+2", inplace=value) From ff6c8a0af524427b5fe31ba35befdf7f10325145 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Wed, 11 Mar 2020 08:35:49 +0700 Subject: [PATCH 296/388] DOC: Fix EX01 in pandas.DataFrame.idxmax (#32551) --- pandas/core/frame.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd5d81bc70dd9..60fc69e8222d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8029,6 +8029,35 @@ def idxmax(self, axis=0, skipna=True) -> Series: Notes ----- This method is the DataFrame version of ``ndarray.argmax``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the maximum value in each column. + + >>> df.idxmax() + consumption Wheat Products + co2_emissions Beef + dtype: object + + To return the index for the maximum value in each row, use ``axis="columns"``. + + >>> df.idxmax(axis="columns") + Pork co2_emissions + Wheat Products consumption + Beef co2_emissions + dtype: object """ axis = self._get_axis_number(axis) indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) From 650cf74325bcbd08d0d41d439ae26ba682789f9f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 10 Mar 2020 20:45:53 -0500 Subject: [PATCH 297/388] Better error message for OOB result (#32499) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/tslibs/c_timestamp.pyx | 13 ++++++++++++- pandas/tests/scalar/timestamp/test_arithmetic.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 9841df0507138..88ee732ded071 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -65,6 +65,7 @@ Bug fixes - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) - Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`) +- Improved error message when subtracting two :class:`Timestamp` that result in an out-of-bounds :class:`Timedelta` (:issue:`31774`) **Categorical** diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx index 2c72cec18f096..3c30460a74ece 100644 --- a/pandas/_libs/tslibs/c_timestamp.pyx +++ b/pandas/_libs/tslibs/c_timestamp.pyx @@ -286,6 +286,10 @@ cdef class _Timestamp(datetime): # coerce if necessary if we are a Timestamp-like if (PyDateTime_Check(self) and (PyDateTime_Check(other) or is_datetime64_object(other))): + # both_timestamps is to determine whether Timedelta(self - other) + # should raise the OOB error, or fall back returning a timedelta. + both_timestamps = (isinstance(other, _Timestamp) and + isinstance(self, _Timestamp)) if isinstance(self, _Timestamp): other = type(self)(other) else: @@ -301,7 +305,14 @@ cdef class _Timestamp(datetime): from pandas._libs.tslibs.timedeltas import Timedelta try: return Timedelta(self.value - other.value) - except (OverflowError, OutOfBoundsDatetime): + except (OverflowError, OutOfBoundsDatetime) as err: + if isinstance(other, _Timestamp): + if both_timestamps: + raise OutOfBoundsDatetime( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err pass elif is_datetime64_object(self): # GH#28286 cython semantics for __rsub__, `other` is actually diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 1cab007c20a0e..ccd7bf721430a 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import OutOfBoundsDatetime + from pandas import Timedelta, Timestamp from pandas.tseries import offsets @@ -60,6 +62,18 @@ def test_overflow_offset_raises(self): with pytest.raises(OverflowError, match=msg): stamp - offset_overflow + def test_overflow_timestamp_raises(self): + # https://github.com/pandas-dev/pandas/issues/31774 + msg = "Result is too large" + a = Timestamp("2101-01-01 00:00:00") + b = Timestamp("1688-01-01 00:00:00") + + with pytest.raises(OutOfBoundsDatetime, match=msg): + a - b + + # but we're OK for timestamp and datetime.datetime + assert (a - b.to_pydatetime()) == (a.to_pydatetime() - b) + def test_delta_preserve_nanos(self): val = Timestamp(1337299200000000123) result = val + timedelta(1) From a2acd1b2c662e6ae87e923989df7c0c95444f6d7 Mon Sep 17 00:00:00 2001 From: Anna Daglis <40292327+AnnaDaglis@users.noreply.github.com> Date: Wed, 11 Mar 2020 01:48:34 +0000 Subject: [PATCH 298/388] BUG: Fix bug, where BooleanDtype columns are converted to Int64 (#32490) --- doc/source/whatsnew/v1.0.2.rst | 2 ++ pandas/core/arrays/datetimes.py | 2 ++ pandas/core/dtypes/cast.py | 11 ++++------- pandas/core/internals/blocks.py | 3 +++ pandas/tests/arrays/test_datetimes.py | 12 ++++++++++++ pandas/tests/series/methods/test_convert_dtypes.py | 5 +++++ 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 88ee732ded071..8868f2ffa0a97 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -64,6 +64,7 @@ Bug fixes **Datetimelike** - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) +- Bug in :meth:`Series.astype` not copying for tz-naive and tz-aware datetime64 dtype (:issue:`32490`) - Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`) - Improved error message when subtracting two :class:`Timestamp` that result in an out-of-bounds :class:`Timedelta` (:issue:`31774`) @@ -85,6 +86,7 @@ Bug fixes - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug in :meth:`DataFrame.convert_dtypes`, where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) **Strings** diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 9f19c7ba0be6e..7223eda22b3d9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -587,6 +587,8 @@ def astype(self, dtype, copy=True): if getattr(self.dtype, "tz", None) is None: return self.tz_localize(new_tz) result = self.tz_convert(new_tz) + if copy: + result = result.copy() if new_tz is None: # Do we want .astype('datetime64[ns]') to be an ndarray. # The astype in Block._astype expects this to return an diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7dac36b53fce5..97c02428cbdf9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1049,7 +1049,8 @@ def convert_dtypes( dtype new dtype """ - if convert_string or convert_integer or convert_boolean: + is_extension = is_extension_array_dtype(input_array.dtype) + if (convert_string or convert_integer or convert_boolean) and not is_extension: try: inferred_dtype = lib.infer_dtype(input_array) except ValueError: @@ -1062,9 +1063,7 @@ def convert_dtypes( if convert_integer: target_int_dtype = "Int64" - if is_integer_dtype(input_array.dtype) and not is_extension_array_dtype( - input_array.dtype - ): + if is_integer_dtype(input_array.dtype): from pandas.core.arrays.integer import _dtypes inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype) @@ -1078,9 +1077,7 @@ def convert_dtypes( inferred_dtype = input_array.dtype if convert_boolean: - if is_bool_dtype(input_array.dtype) and not is_extension_array_dtype( - input_array.dtype - ): + if is_bool_dtype(input_array.dtype): inferred_dtype = "boolean" else: if isinstance(inferred_dtype, str) and inferred_dtype == "boolean": diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c088b7020927b..bce440fe09319 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2228,6 +2228,9 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): # if we are passed a datetime64[ns, tz] if is_datetime64tz_dtype(dtype): values = self.values + if copy: + # this should be the only copy + values = values.copy() if getattr(values, "tz", None) is None: values = DatetimeArray(values).tz_localize("UTC") values = values.tz_convert(dtype.tz) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index a59ed429cc404..2c26e72a245f7 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -151,6 +151,18 @@ def test_astype_to_same(self): result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) assert result is arr + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"]) + @pytest.mark.parametrize( + "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"] + ) + def test_astype_copies(self, dtype, other): + # https://github.com/pandas-dev/pandas/pull/32490 + s = pd.Series([1, 2], dtype=dtype) + orig = s.copy() + t = s.astype(other) + t[:] = pd.NaT + tm.assert_series_equal(s, orig) + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 17527a09f07a1..a41f893e3753f 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -279,3 +279,8 @@ def test_convert_string_dtype(self): ) result = df.convert_dtypes() tm.assert_frame_equal(df, result) + + def test_convert_bool_dtype(self): + # GH32287 + df = pd.DataFrame({"A": pd.array([True])}) + tm.assert_frame_equal(df, df.convert_dtypes()) From 5fadbe40e223f4d30ac9978edbdfea12fe06399a Mon Sep 17 00:00:00 2001 From: Derek McCammond Date: Tue, 10 Mar 2020 21:50:42 -0400 Subject: [PATCH 299/388] Avoid bare pytest.raises in dtypes/cast/test_upcast.py (#32603) --- pandas/tests/dtypes/cast/test_upcast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_upcast.py b/pandas/tests/dtypes/cast/test_upcast.py index bb7a7d059c7ee..f9227a4e78a79 100644 --- a/pandas/tests/dtypes/cast/test_upcast.py +++ b/pandas/tests/dtypes/cast/test_upcast.py @@ -12,7 +12,7 @@ def test_upcast_error(result): # GH23823 require result arg to be ndarray mask = np.array([False, True, False]) other = np.array([61, 62, 63]) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="The result input must be a ndarray"): result, _ = maybe_upcast_putmask(result, mask, other) From 8bc471aca858afcdbf43ac3896e7768e8186fd1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20=C5=A0koda?= Date: Wed, 11 Mar 2020 02:51:03 +0100 Subject: [PATCH 300/388] BUG: Fix rolling functions with variable windows on decreasing index (#32386) --- doc/source/whatsnew/v1.0.2.rst | 4 +++ pandas/_libs/window/aggregations.pyx | 4 +-- pandas/_libs/window/indexers.pyx | 10 ++++--- pandas/tests/window/test_timeseries_window.py | 27 +++++++++++-------- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 8868f2ffa0a97..f00bca3e6bcc9 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -92,6 +92,10 @@ Bug fixes - Using ``pd.NA`` with :meth:`Series.str.repeat` now correctly outputs a null value instead of raising error for vector inputs (:issue:`31632`) +**Rolling** + +- Fixed rolling operations with variable window (defined by time duration) on decreasing time index (:issue:`32385`). + .. --------------------------------------------------------------------------- .. _whatsnew_102.contributors: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 80b9144042041..a90d2f77e44d1 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1013,7 +1013,7 @@ def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int64_t win): """ - Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. Parameters ---------- @@ -1030,7 +1030,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start, def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp): """ - Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. Parameters ---------- diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 2d01d1964c043..8a1e7feb57ace 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -44,6 +44,7 @@ def calculate_variable_window_bounds( cdef: bint left_closed = False bint right_closed = False + int index_growth_sign = 1 ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound Py_ssize_t i, j @@ -58,6 +59,9 @@ def calculate_variable_window_bounds( if closed in ['left', 'both']: left_closed = True + if index[num_values - 1] < index[0]: + index_growth_sign = -1 + start = np.empty(num_values, dtype='int64') start.fill(-1) end = np.empty(num_values, dtype='int64') @@ -78,7 +82,7 @@ def calculate_variable_window_bounds( # end is end of slice interval (not including) for i in range(1, num_values): end_bound = index[i] - start_bound = index[i] - window_size + start_bound = index[i] - index_growth_sign * window_size # left endpoint is closed if left_closed: @@ -88,13 +92,13 @@ def calculate_variable_window_bounds( # within the constraint start[i] = i for j in range(start[i - 1], i): - if index[j] > start_bound: + if (index[j] - start_bound) * index_growth_sign > 0: start[i] = j break # end bound is previous end # or current index - if index[end[i - 1]] <= end_bound: + if (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: end[i] = i + 1 else: end[i] = end[i - 1] diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 5f5e10b5dd497..0c5289cd78fed 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -709,20 +709,25 @@ def test_rolling_cov_offset(self): tm.assert_series_equal(result, expected2) def test_rolling_on_decreasing_index(self): - # GH-19248 + # GH-19248, GH-32385 index = [ - Timestamp("20190101 09:00:00"), - Timestamp("20190101 09:00:02"), - Timestamp("20190101 09:00:03"), - Timestamp("20190101 09:00:05"), - Timestamp("20190101 09:00:06"), + Timestamp("20190101 09:00:30"), + Timestamp("20190101 09:00:27"), + Timestamp("20190101 09:00:20"), + Timestamp("20190101 09:00:18"), + Timestamp("20190101 09:00:10"), ] - df = DataFrame({"column": [3, 4, 4, 2, 1]}, index=reversed(index)) - result = df.rolling("2s").min() - expected = DataFrame( - {"column": [3.0, 3.0, 3.0, 2.0, 1.0]}, index=reversed(index) - ) + df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index) + result = df.rolling("5s").min() + expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index) + tm.assert_frame_equal(result, expected) + + def test_rolling_on_empty(self): + # GH-32385 + df = DataFrame({"column": []}, index=[]) + result = df.rolling("5s").min() + expected = DataFrame({"column": []}, index=[]) tm.assert_frame_equal(result, expected) def test_rolling_on_multi_index_level(self): From f33120cff1c13910c8b63e85bffe47dfced93bc6 Mon Sep 17 00:00:00 2001 From: tolhassianipar <45849317+tolhassianipar@users.noreply.github.com> Date: Wed, 11 Mar 2020 09:00:35 +0700 Subject: [PATCH 301/388] DOC: Fixed errors in pandas.DataFrame.asfreq PR07, RT02, RT03, SA04 (#32362) --- pandas/core/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b039630efc989..add2215645f2f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7485,6 +7485,7 @@ def asfreq( Parameters ---------- freq : DateOffset or str + Frequency DateOffset or string. method : {'backfill'/'bfill', 'pad'/'ffill'}, default None Method to use for filling holes in reindexed Series (note this does not fill NaNs that already were present): @@ -7502,11 +7503,12 @@ def asfreq( Returns ------- - converted : same type as caller + Same type as caller + Object converted to the specified frequency. See Also -------- - reindex + reindex : Conform DataFrame to new index with optional filling logic. Notes ----- From d4815a59ea4789536bd832668434289bb7bab0be Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:13:23 -0700 Subject: [PATCH 302/388] CLN: use _values_for_argsort for join_non_unique, join_monotonic (#32467) --- pandas/core/arrays/categorical.py | 2 +- pandas/core/indexes/base.py | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ba4c2e168e0c4..9dfe68b16ad6a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1488,7 +1488,7 @@ def check_for_ordered(self, op): ) def _values_for_argsort(self): - return self._codes.copy() + return self._codes def argsort(self, ascending=True, kind="quicksort", **kwargs): """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3eab757311ccb..93ed301566ddc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3395,6 +3395,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) ------- join_index, (left_indexer, right_indexer) """ + other = ensure_index(other) self_is_mi = isinstance(self, ABCMultiIndex) other_is_mi = isinstance(other, ABCMultiIndex) @@ -3414,8 +3415,6 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) other, level, how=how, return_indexers=return_indexers ) - other = ensure_index(other) - if len(other) == 0 and how in ("left", "outer"): join_index = self._shallow_copy() if return_indexers: @@ -3577,16 +3576,26 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how="left", return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers + # We only get here if dtypes match + assert self.dtype == other.dtype + + if is_extension_array_dtype(self.dtype): + lvalues = self._data._values_for_argsort() + rvalues = other._data._values_for_argsort() + else: + lvalues = self._values + rvalues = other._values + left_idx, right_idx = _get_join_indexers( - [self._ndarray_values], [other._ndarray_values], how=how, sort=True + [lvalues], [rvalues], how=how, sort=True ) left_idx = ensure_platform_int(left_idx) right_idx = ensure_platform_int(right_idx) - join_index = np.asarray(self._ndarray_values.take(left_idx)) + join_index = np.asarray(lvalues.take(left_idx)) mask = left_idx == -1 - np.putmask(join_index, mask, other._ndarray_values.take(right_idx)) + np.putmask(join_index, mask, rvalues.take(right_idx)) join_index = self._wrap_joined_index(join_index, other) @@ -3737,6 +3746,9 @@ def _get_leaf_sorter(labels): return join_index def _join_monotonic(self, other, how="left", return_indexers=False): + # We only get here with matching dtypes + assert other.dtype == self.dtype + if self.equals(other): ret_index = other if how == "right" else self if return_indexers: @@ -3744,8 +3756,12 @@ def _join_monotonic(self, other, how="left", return_indexers=False): else: return ret_index - sv = self._ndarray_values - ov = other._ndarray_values + if is_extension_array_dtype(self.dtype): + sv = self._data._values_for_argsort() + ov = other._data._values_for_argsort() + else: + sv = self._values + ov = other._values if self.is_unique and other.is_unique: # We can perform much better than the general case From 5ebbb56509b9c3d5adbd48df2264faca7f179e18 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:19:30 -0700 Subject: [PATCH 303/388] CLN: avoid _internal_get_values in pandas._testing (#32570) --- pandas/_testing.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 62a6dacac3b0c..136dfbd40276d 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1018,7 +1018,8 @@ def assert_extension_array_equal( if hasattr(left, "asi8") and type(right) == type(left): # Avoid slow object-dtype comparisons - assert_numpy_array_equal(left.asi8, right.asi8) + # np.asarray for case where we have a np.MaskedArray + assert_numpy_array_equal(np.asarray(left.asi8), np.asarray(right.asi8)) return left_na = np.asarray(left.isna()) @@ -1156,20 +1157,23 @@ def assert_series_equal( raise AssertionError(msg) elif is_interval_dtype(left.dtype) or is_interval_dtype(right.dtype): assert_interval_array_equal(left.array, right.array) - elif is_datetime64tz_dtype(left.dtype): - # .values is an ndarray, but ._values is the ExtensionArray. + elif is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype): + _testing.assert_almost_equal( + left._values, + right._values, + check_less_precise=check_less_precise, + check_dtype=check_dtype, + obj=str(obj), + ) + elif is_extension_array_dtype(left.dtype) or is_extension_array_dtype(right.dtype): + assert_extension_array_equal(left._values, right._values) + elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): + # DatetimeArray or TimedeltaArray assert_extension_array_equal(left._values, right._values) - elif ( - is_extension_array_dtype(left) - and not is_categorical_dtype(left) - and is_extension_array_dtype(right) - and not is_categorical_dtype(right) - ): - assert_extension_array_equal(left.array, right.array) else: _testing.assert_almost_equal( - left._internal_get_values(), - right._internal_get_values(), + left._values, + right._values, check_less_precise=check_less_precise, check_dtype=check_dtype, obj=str(obj), From 8e743253bc29cafb12ae026b7c729b00475e61c4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:20:32 -0700 Subject: [PATCH 304/388] CLN: remove SingleBlockManager.get_values (#32522) --- pandas/core/internals/managers.py | 4 ---- pandas/core/series.py | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 98afc5ac3a0e3..b21b3d1b475d9 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1596,10 +1596,6 @@ def internal_values(self): """The array that Series._values returns""" return self._block.internal_values() - def get_values(self) -> np.ndarray: - """ return a dense type view """ - return np.array(self._block.to_dense(), copy=False) - @property def _can_hold_na(self) -> bool: return self._block._can_hold_na diff --git a/pandas/core/series.py b/pandas/core/series.py index 568e99622dd29..96177da1698a5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -537,7 +537,8 @@ def _internal_get_values(self): numpy.ndarray Data of the Series. """ - return self._data.get_values() + blk = self._data._block + return np.array(blk.to_dense(), copy=False) # ops def ravel(self, order="C"): From b9762eb931ff6f3208d805dfad7a135528008c69 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 10 Mar 2020 19:22:01 -0700 Subject: [PATCH 305/388] REG: Restore read_csv function for some file-likes (#32577) Restores behavior down to the fact that the Python engine cannot handle NamedTemporaryFile. Closes https://github.com/pandas-dev/pandas/issues/31819 Credit to @sasanquaneuf for originating idea. --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/parsers.pyx | 3 ++- pandas/io/parsers.py | 4 ++-- pandas/tests/io/parser/test_encoding.py | 23 +++++++++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index f00bca3e6bcc9..d99ce468d402f 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -25,6 +25,7 @@ Fixed regressions - Fixed regression in :meth:`pandas.core.groupby.GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - Fixed bug in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) +- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2fd227694800c..3a42a64046abd 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -638,7 +638,8 @@ cdef class TextReader: raise ValueError(f'Unrecognized compression type: ' f'{self.compression}') - if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)): + if (self.encoding and hasattr(source, "read") and + not hasattr(source, "encoding")): source = io.TextIOWrapper( source, self.encoding.decode('utf-8'), newline='') diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index bc2fb9f0f41bc..50b5db0274aa5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -5,7 +5,7 @@ from collections import abc, defaultdict import csv import datetime -from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper +from io import StringIO, TextIOWrapper import re import sys from textwrap import fill @@ -1870,7 +1870,7 @@ def __init__(self, src, **kwds): # Handle the file object with universal line mode enabled. # We will handle the newline character ourselves later on. - if isinstance(src, (BufferedIOBase, RawIOBase)): + if hasattr(src, "read") and not hasattr(src, "encoding"): src = TextIOWrapper(src, encoding=encoding, newline="") kwds["encoding"] = "utf-8" diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 3661e4e056db2..13b74cf29f857 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -5,6 +5,7 @@ from io import BytesIO import os +import tempfile import numpy as np import pytest @@ -174,3 +175,25 @@ def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding) result = parser.read_csv(f, encoding=encoding if pass_encoding else None) tm.assert_frame_equal(result, expected) + + +def test_encoding_named_temp_file(all_parsers): + # see gh-31819 + parser = all_parsers + encoding = "shift-jis" + + if parser.engine == "python": + pytest.skip("NamedTemporaryFile does not work with Python engine") + + title = "てすと" + data = "こむ" + + expected = DataFrame({title: [data]}) + + with tempfile.NamedTemporaryFile() as f: + f.write(f"{title}\n{data}".encode(encoding)) + + f.seek(0) + + result = parser.read_csv(f, encoding=encoding) + tm.assert_frame_equal(result, expected) From 9a31bdd5f70485cce19977faf6247e670897b1d1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:22:42 -0700 Subject: [PATCH 306/388] CLN: rename get_block_values, simplify (#32521) --- pandas/_libs/src/ujson/python/objToJSON.c | 4 ++-- pandas/core/internals/blocks.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 8cfc20ffd2c1c..1d17cce0bc3a9 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -237,9 +237,9 @@ static PyObject *get_values(PyObject *obj) { } } - if ((values == NULL) && PyObject_HasAttrString(obj, "get_block_values")) { + if ((values == NULL) && PyObject_HasAttrString(obj, "get_block_values_for_json")) { PRINTMARK(); - values = PyObject_CallMethod(obj, "get_block_values", NULL); + values = PyObject_CallMethod(obj, "get_block_values_for_json", NULL); if (values == NULL) { // Clear so we can subsequently try another method diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bce440fe09319..024d3b205df77 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -231,11 +231,12 @@ def get_values(self, dtype=None): return self.values.astype(object) return self.values - def get_block_values(self, dtype=None): + def get_block_values_for_json(self) -> np.ndarray: """ - This is used in the JSON C code + This is used in the JSON C code. """ - return self.get_values(dtype=dtype) + # TODO(2DEA): reshape will be unnecessary with 2D EAs + return np.asarray(self.values).reshape(self.shape) def to_dense(self): return self.values.view() From c910ec36719420426b3ebe1a5cee87852f224dec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:27:14 -0700 Subject: [PATCH 307/388] TST: stricter tests, avoid check_categorical=False, check_less_precise (#32571) --- pandas/tests/frame/test_analytics.py | 29 +++-------------- pandas/tests/frame/test_to_csv.py | 8 ++--- pandas/tests/generic/test_series.py | 4 +-- pandas/tests/groupby/test_function.py | 2 +- pandas/tests/io/excel/test_writers.py | 2 +- pandas/tests/io/test_sql.py | 4 +-- pandas/tests/io/test_stata.py | 41 +++++++++++++++++------- pandas/tests/series/test_constructors.py | 2 +- 8 files changed, 43 insertions(+), 49 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 07e30d41c216d..f4a10abea9757 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -32,7 +32,6 @@ def assert_stat_op_calc( has_skipna=True, check_dtype=True, check_dates=False, - check_less_precise=False, skipna_alternative=None, ): """ @@ -54,9 +53,6 @@ def assert_stat_op_calc( "alternative(frame)" should be checked. check_dates : bool, default false Whether opname should be tested on a Datetime Series - check_less_precise : bool, default False - Whether results should only be compared approximately; - passed on to tm.assert_series_equal skipna_alternative : function, default None NaN-safe version of alternative """ @@ -84,17 +80,11 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) tm.assert_series_equal( - result0, - frame.apply(wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise, + result0, frame.apply(wrapper), check_dtype=check_dtype, ) # HACK: win32 tm.assert_series_equal( - result1, - frame.apply(wrapper, axis=1), - check_dtype=False, - check_less_precise=check_less_precise, + result1, frame.apply(wrapper, axis=1), check_dtype=False, ) else: skipna_wrapper = alternative @@ -102,17 +92,12 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) tm.assert_series_equal( - result0, - frame.apply(skipna_wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise, + result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, ) if opname in ["sum", "prod"]: expected = frame.apply(skipna_wrapper, axis=1) - tm.assert_series_equal( - result1, expected, check_dtype=False, check_less_precise=check_less_precise - ) + tm.assert_series_equal(result1, expected, check_dtype=False) # check dtypes if check_dtype: @@ -333,11 +318,7 @@ def kurt(x): # mixed types (with upcasting happening) assert_stat_op_calc( - "sum", - np.sum, - mixed_float_frame.astype("float32"), - check_dtype=False, - check_less_precise=True, + "sum", np.sum, mixed_float_frame.astype("float32"), check_dtype=False, ) assert_stat_op_calc( diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index cec2bd4b634c1..a49da7a5ec2fc 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -250,9 +250,7 @@ def make_dtnat_arr(n, nnat=None): df.to_csv(pth, chunksize=chunksize) recons = self.read_csv(pth)._convert(datetime=True, coerce=True) - tm.assert_frame_equal( - df, recons, check_names=False, check_less_precise=True - ) + tm.assert_frame_equal(df, recons, check_names=False) @pytest.mark.slow def test_to_csv_moar(self): @@ -354,9 +352,7 @@ def _to_uni(x): recons.columns = np.array(recons.columns, dtype=c_dtype) df.columns = np.array(df.columns, dtype=c_dtype) - tm.assert_frame_equal( - df, recons, check_names=False, check_less_precise=True - ) + tm.assert_frame_equal(df, recons, check_names=False) N = 100 chunksize = 1000 diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index f119eb422a276..388bb8e3f636d 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -237,9 +237,7 @@ def test_to_xarray_index_types(self, index): assert isinstance(result, DataArray) # idempotency - tm.assert_series_equal( - result.to_series(), s, check_index_type=False, check_categorical=True - ) + tm.assert_series_equal(result.to_series(), s, check_index_type=False) @td.skip_if_no("xarray", min_version="0.7.0") def test_to_xarray(self): diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 83080aa98648f..03278e69fe94a 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -661,7 +661,7 @@ def test_nlargest_mi_grouper(): ] expected = Series(exp_values, index=exp_idx) - tm.assert_series_equal(result, expected, check_exact=False, check_less_precise=True) + tm.assert_series_equal(result, expected, check_exact=False) def test_nsmallest(): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 506d223dbedb4..59899673cfc31 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -564,7 +564,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=[0, 1]) - tm.assert_frame_equal(df, recons, check_less_precise=True) + tm.assert_frame_equal(df, recons) def test_excel_roundtrip_indexname(self, merge_cells, path): df = DataFrame(np.random.randn(10, 4)) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index fc3876eee9d66..86502a67e1869 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2372,7 +2372,7 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame, check_less_precise=True) + tm.assert_frame_equal(result, frame) def test_execute(self): frame = tm.makeTimeDataFrame() @@ -2632,7 +2632,7 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame, check_less_precise=True) + tm.assert_frame_equal(result, frame) def test_chunksize_read_type(self): frame = tm.makeTimeDataFrame() diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b65efac2bd527..3efac9cd605a8 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -254,12 +254,21 @@ def test_read_dta4(self, file): ) # these are all categoricals - expected = pd.concat( - [expected[col].astype("category") for col in expected], axis=1 - ) + for col in expected: + orig = expected[col].copy() + + categories = np.asarray(expected["fully_labeled"][orig.notna()]) + if col == "incompletely_labeled": + categories = orig + + cat = orig.astype("category")._values + cat = cat.set_categories(categories, ordered=True) + cat.categories.rename(None, inplace=True) + + expected[col] = cat # stata doesn't save .category metadata - tm.assert_frame_equal(parsed, expected, check_categorical=False) + tm.assert_frame_equal(parsed, expected) # File containing strls def test_read_dta12(self): @@ -952,19 +961,27 @@ def test_categorical_writing(self, version): original = pd.concat( [original[col].astype("category") for col in original], axis=1 ) + expected.index.name = "index" expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str) expected["unlabeled"] = expected["unlabeled"].apply(str) - expected = pd.concat( - [expected[col].astype("category") for col in expected], axis=1 - ) - expected.index.name = "index" + for col in expected: + orig = expected[col].copy() + + cat = orig.astype("category")._values + cat = cat.as_ordered() + if col == "unlabeled": + cat = cat.set_categories(orig, ordered=True) + + cat.categories.rename(None, inplace=True) + + expected[col] = cat with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index("index") - tm.assert_frame_equal(res, expected, check_categorical=False) + tm.assert_frame_equal(res, expected) def test_categorical_warnings_and_errors(self): # Warning for non-string labels @@ -1056,9 +1073,11 @@ def test_categorical_sorting(self, file): parsed.index = np.arange(parsed.shape[0]) codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4] categories = ["Poor", "Fair", "Good", "Very good", "Excellent"] - cat = pd.Categorical.from_codes(codes=codes, categories=categories) + cat = pd.Categorical.from_codes( + codes=codes, categories=categories, ordered=True + ) expected = pd.Series(cat, name="srh") - tm.assert_series_equal(expected, parsed["srh"], check_categorical=False) + tm.assert_series_equal(expected, parsed["srh"]) @pytest.mark.parametrize("file", ["dta19_115", "dta19_117"]) def test_categorical_ordering(self, file): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 1a794f8656abe..46ac430a13394 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -393,7 +393,7 @@ def test_constructor_categorical_dtype(self): expected = Series( ["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True) ) - tm.assert_series_equal(result, expected, check_categorical=True) + tm.assert_series_equal(result, expected) def test_constructor_categorical_string(self): # GH 26336: the string 'category' maintains existing CategoricalDtype From 8c38283cbb9c6a9ba6fa91d6e23e44133f0421d2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:29:34 -0700 Subject: [PATCH 308/388] CLN: avoid values_from_object in Series (#32426) --- pandas/core/frame.py | 12 +++++-- pandas/core/nanops.py | 49 +++++++++++++++------------- pandas/core/series.py | 6 ++-- pandas/tests/frame/test_analytics.py | 5 --- 4 files changed, 39 insertions(+), 33 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 60fc69e8222d6..efb922fa76094 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,6 +77,7 @@ ensure_platform_int, infer_dtype_from_object, is_bool_dtype, + is_datetime64_any_dtype, is_dict_like, is_dtype_equal, is_extension_array_dtype, @@ -88,6 +89,7 @@ is_list_like, is_named_tuple, is_object_dtype, + is_period_dtype, is_scalar, is_sequence, needs_i8_conversion, @@ -7789,11 +7791,13 @@ def _reduce( self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds ): - dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M") + dtype_is_dt = self.dtypes.apply( + lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x) + ) if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any(): warnings.warn( "DataFrame.mean and DataFrame.median with numeric_only=None " - "will include datetime64 and datetime64tz columns in a " + "will include datetime64, datetime64tz, and PeriodDtype columns in a " "future version.", FutureWarning, stacklevel=3, @@ -7854,6 +7858,10 @@ def blk_func(values): assert len(res) == max(list(res.keys())) + 1, res.keys() out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype) out.index = df.columns + if axis == 0 and df.dtypes.apply(needs_i8_conversion).any(): + # FIXME: needs_i8_conversion check is kludge, not sure + # why it is necessary in this case and this case alone + out[:] = coerce_to_dtypes(out.values, df.dtypes) return out if numeric_only is None: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 78313f5c3bbbf..269843abb15ee 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -7,7 +7,7 @@ from pandas._config import get_option -from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib +from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib from pandas._typing import Dtype, Scalar from pandas.compat._optional import import_optional_dependency @@ -17,9 +17,7 @@ is_any_int_dtype, is_bool_dtype, is_complex, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_datetime_or_timedelta_dtype, + is_datetime64_any_dtype, is_float, is_float_dtype, is_integer, @@ -28,8 +26,10 @@ is_object_dtype, is_scalar, is_timedelta64_dtype, + needs_i8_conversion, pandas_dtype, ) +from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna from pandas.core.construction import extract_array @@ -134,10 +134,8 @@ def f( def _bn_ok_dtype(dtype: Dtype, name: str) -> bool: - # Bottleneck chokes on datetime64 - if not is_object_dtype(dtype) and not ( - is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype) - ): + # Bottleneck chokes on datetime64, PeriodDtype (or and EA) + if not is_object_dtype(dtype) and not needs_i8_conversion(dtype): # GH 15507 # bottleneck does not properly upcast during the sum @@ -283,17 +281,16 @@ def _get_values( # with scalar fill_value. This guarantee is important for the # maybe_upcast_putmask call below assert is_scalar(fill_value) + values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) - values = extract_array(values, extract_numpy=True) dtype = values.dtype - if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): + if needs_i8_conversion(values): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above - values = getattr(values, "asi8", values) - values = values.view(np.int64) + values = np.asarray(values.view("i8")) dtype_ok = _na_ok_dtype(dtype) @@ -307,7 +304,8 @@ def _get_values( if skipna and copy: values = values.copy() - if dtype_ok: + assert mask is not None # for mypy + if dtype_ok and mask.any(): np.putmask(values, mask, fill_value) # promote if needed @@ -325,13 +323,14 @@ def _get_values( def _na_ok_dtype(dtype) -> bool: - # TODO: what about datetime64tz? PeriodDtype? - return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64)) + if needs_i8_conversion(dtype): + return False + return not issubclass(dtype.type, np.integer) def _wrap_results(result, dtype: Dtype, fill_value=None): """ wrap our results if needed """ - if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if is_datetime64_any_dtype(dtype): if fill_value is None: # GH#24293 fill_value = iNaT @@ -342,7 +341,8 @@ def _wrap_results(result, dtype: Dtype, fill_value=None): result = np.nan result = Timestamp(result, tz=tz) else: - result = result.view(dtype) + # If we have float dtype, taking a view will give the wrong result + result = result.astype(dtype) elif is_timedelta64_dtype(dtype): if not isinstance(result, np.ndarray): if result == fill_value: @@ -356,6 +356,14 @@ def _wrap_results(result, dtype: Dtype, fill_value=None): else: result = result.astype("m8[ns]").view(dtype) + elif isinstance(dtype, PeriodDtype): + if is_float(result) and result.is_integer(): + result = int(result) + if is_integer(result): + result = Period._from_ordinal(result, freq=dtype.freq) + else: + raise NotImplementedError(type(result), result) + return result @@ -542,12 +550,7 @@ def nanmean(values, axis=None, skipna=True, mask=None): ) dtype_sum = dtype_max dtype_count = np.float64 - if ( - is_integer_dtype(dtype) - or is_timedelta64_dtype(dtype) - or is_datetime64_dtype(dtype) - or is_datetime64tz_dtype(dtype) - ): + if is_integer_dtype(dtype) or needs_i8_conversion(dtype): dtype_sum = np.float64 elif is_float_dtype(dtype): dtype_sum = dtype diff --git a/pandas/core/series.py b/pandas/core/series.py index 96177da1698a5..40afd5dcffb96 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1985,7 +1985,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs): nan """ skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) - i = nanops.nanargmin(com.values_from_object(self), skipna=skipna) + i = nanops.nanargmin(self._values, skipna=skipna) if i == -1: return np.nan return self.index[i] @@ -2056,7 +2056,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): nan """ skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) - i = nanops.nanargmax(com.values_from_object(self), skipna=skipna) + i = nanops.nanargmax(self._values, skipna=skipna) if i == -1: return np.nan return self.index[i] @@ -2094,7 +2094,7 @@ def round(self, decimals=0, *args, **kwargs) -> "Series": dtype: float64 """ nv.validate_round(args, kwargs) - result = com.values_from_object(self).round(decimals) + result = self._values.round(decimals) result = self._constructor(result, index=self.index).__finalize__(self) return result diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f4a10abea9757..82f8a102f9c64 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -856,11 +856,6 @@ def test_mean_datetimelike(self): expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]}) tm.assert_series_equal(result, expected) - @pytest.mark.xfail( - reason="casts to object-dtype and then tries to add timestamps", - raises=TypeError, - strict=True, - ) def test_mean_datetimelike_numeric_only_false(self): df = pd.DataFrame( { From b8ee1e1ed4910152967af27338a062ce7e4a68e9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:32:10 -0700 Subject: [PATCH 309/388] TYP: enforce annotation on SingleBlockManager.__init__ (#32421) --- pandas/core/internals/managers.py | 95 ++++++++++--------- pandas/core/series.py | 4 +- pandas/tests/extension/test_external_block.py | 5 +- 3 files changed, 56 insertions(+), 48 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b21b3d1b475d9..88e6eab7f2fc2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,12 +3,12 @@ import itertools import operator import re -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib -from pandas._typing import DtypeObj, Label +from pandas._typing import ArrayLike, DtypeObj, Label from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -58,6 +58,8 @@ # TODO: flexible with index=None and/or items=None +T = TypeVar("T", bound="BlockManager") + class BlockManager(PandasObject): """ @@ -149,6 +151,13 @@ def __init__( self._blknos = None self._blklocs = None + @classmethod + def from_blocks(cls, blocks: List[Block], axes: List[Index]): + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + return cls(blocks, axes, do_integrity_check=False) + @property def blknos(self): """ @@ -176,7 +185,7 @@ def blklocs(self): return self._blklocs - def make_empty(self, axes=None) -> "BlockManager": + def make_empty(self: T, axes=None) -> T: """ return an empty BlockManager with the items axis of len 0 """ if axes is None: axes = [Index([])] + self.axes[1:] @@ -184,10 +193,11 @@ def make_empty(self, axes=None) -> "BlockManager": # preserve dtype if possible if self.ndim == 1: assert isinstance(self, SingleBlockManager) # for mypy - blocks = np.array([], dtype=self.array_dtype) + arr = np.array([], dtype=self.array_dtype) + blocks = [make_block(arr, placement=slice(0, 0), ndim=1)] else: blocks = [] - return type(self)(blocks, axes) + return type(self).from_blocks(blocks, axes) def __nonzero__(self) -> bool: return True @@ -380,7 +390,7 @@ def reduce(self, func, *args, **kwargs): return res - def apply(self, f, filter=None, **kwargs) -> "BlockManager": + def apply(self: T, f, filter=None, **kwargs) -> T: """ Iterate over the blocks, collect and create a new BlockManager. @@ -458,8 +468,8 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager": if len(result_blocks) == 0: return self.make_empty(self.axes) - bm = type(self)(result_blocks, self.axes, do_integrity_check=False) - return bm + + return type(self).from_blocks(result_blocks, self.axes) def quantile( self, @@ -658,7 +668,7 @@ def comp(s, regex=False): rb = new_rb result_blocks.extend(rb) - bm = type(self)(result_blocks, self.axes) + bm = type(self).from_blocks(result_blocks, self.axes) bm._consolidate_inplace() return bm @@ -747,7 +757,7 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": axes = list(self.axes) axes[0] = self.items.take(indexer) - return type(self)(new_blocks, axes, do_integrity_check=False) + return type(self).from_blocks(new_blocks, axes) def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": @@ -774,7 +784,7 @@ def __contains__(self, item) -> bool: def nblocks(self) -> int: return len(self.blocks) - def copy(self, deep=True) -> "BlockManager": + def copy(self: T, deep=True) -> T: """ Make deep or shallow copy of BlockManager @@ -1244,14 +1254,14 @@ def reindex_axis( ) def reindex_indexer( - self, + self: T, new_axis, indexer, axis: int, fill_value=None, - allow_dups=False, + allow_dups: bool = False, copy: bool = True, - ): + ) -> T: """ Parameters ---------- @@ -1299,7 +1309,8 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis - return type(self)(new_blocks, new_axes) + + return type(self).from_blocks(new_blocks, new_axes) def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): """ @@ -1500,6 +1511,8 @@ def __init__( do_integrity_check: bool = False, fastpath: bool = False, ): + assert isinstance(block, Block), type(block) + if isinstance(axis, list): if len(axis) != 1: raise ValueError( @@ -1510,38 +1523,29 @@ def __init__( # passed from constructor, single block, single axis if fastpath: self.axes = [axis] - if isinstance(block, list): - - # empty block - if len(block) == 0: - block = [np.array([])] - elif len(block) != 1: - raise ValueError( - "Cannot create SingleBlockManager with more than 1 block" - ) - block = block[0] else: self.axes = [ensure_index(axis)] - # create the block here - if isinstance(block, list): - - # provide consolidation to the interleaved_dtype - if len(block) > 1: - dtype = _interleaved_dtype(block) - block = [b.astype(dtype) for b in block] - block = _consolidate(block) - - if len(block) != 1: - raise ValueError( - "Cannot create SingleBlockManager with more than 1 block" - ) - block = block[0] + self.blocks = tuple([block]) - if not isinstance(block, Block): - block = make_block(block, placement=slice(0, len(axis)), ndim=1) + @classmethod + def from_blocks( + cls, blocks: List[Block], axes: List[Index] + ) -> "SingleBlockManager": + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + assert len(blocks) == 1 + assert len(axes) == 1 + return cls(blocks[0], axes[0], do_integrity_check=False, fastpath=True) - self.blocks = tuple([block]) + @classmethod + def from_array(cls, array: ArrayLike, index: Index) -> "SingleBlockManager": + """ + Constructor for if we have an array that is not yet a Block. + """ + block = make_block(array, placement=slice(0, len(index)), ndim=1) + return cls(block, index, fastpath=True) def _post_setstate(self): pass @@ -1568,7 +1572,10 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "SingleBlockManager": if axis >= self.ndim: raise IndexError("Requested axis not found in manager") - return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True) + blk = self._block + array = blk._slice(slobj) + block = blk.make_block_same_class(array, placement=range(len(array))) + return type(self)(block, self.index[slobj], fastpath=True) @property def index(self) -> Index: @@ -1626,7 +1633,7 @@ def fast_xs(self, loc): """ raise NotImplementedError("Use series._values[loc] instead") - def concat(self, to_concat, new_axis) -> "SingleBlockManager": + def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager": """ Concatenate a list of SingleBlockManagers into a single SingleBlockManager. diff --git a/pandas/core/series.py b/pandas/core/series.py index 40afd5dcffb96..2d8eb9b29498a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -205,7 +205,7 @@ def __init__( # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): - data = SingleBlockManager(data, index, fastpath=True) + data = SingleBlockManager.from_array(data, index) if copy: data = data.copy() if index is None: @@ -317,7 +317,7 @@ def __init__( else: data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True) - data = SingleBlockManager(data, index, fastpath=True) + data = SingleBlockManager.from_array(data, index) generic.NDFrame.__init__(self, data) self.name = name diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 6311070cfe2bb..8a8dac54cf96a 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas.core.internals import BlockManager +from pandas.core.internals import BlockManager, SingleBlockManager from pandas.core.internals.blocks import Block, NonConsolidatableMixIn @@ -36,7 +36,8 @@ def test_concat_series(): # GH17728 values = np.arange(3, dtype="int64") block = CustomBlock(values, placement=slice(0, 3)) - s = pd.Series(block, pd.RangeIndex(3), fastpath=True) + mgr = SingleBlockManager(block, pd.RangeIndex(3)) + s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True) res = pd.concat([s, s]) assert isinstance(res._data.blocks[0], CustomBlock) From 03b1f19a698d7b5497ad2b62dc048acdcb9dfb02 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:32:40 -0700 Subject: [PATCH 310/388] CLN: remove unnecessary values_from_objects in groupby.ops (#32547) --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 7259268ac3f2b..577c874c9cbbe 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -217,7 +217,7 @@ def indices(self): return self.groupings[0].indices else: codes_list = [ping.codes for ping in self.groupings] - keys = [com.values_from_object(ping.group_index) for ping in self.groupings] + keys = [ping.group_index for ping in self.groupings] return get_indexer_dict(codes_list, keys) @property From 4b99525eeaeb101d8320d1d29618667660efe60e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:33:13 -0700 Subject: [PATCH 311/388] CLN: values_from_object in computation.pytables (#32557) --- pandas/core/computation/pytables.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 653d014775386..15d9987310f18 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -17,6 +17,7 @@ from pandas.core.computation.common import _ensure_decoded from pandas.core.computation.expr import BaseExprVisitor from pandas.core.computation.ops import UndefinedVariableError, is_term +from pandas.core.construction import extract_array from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded @@ -202,7 +203,7 @@ def stringify(value): v = Timedelta(v, unit="s").value return TermValue(int(v), v, kind) elif meta == "category": - metadata = com.values_from_object(self.metadata) + metadata = extract_array(self.metadata, extract_numpy=True) result = metadata.searchsorted(v, side="left") # result returns 0 if v is first element or if v is not in metadata From 33f67d98aa74bdfdc4237930229b3dbe06d4fca7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:35:21 -0700 Subject: [PATCH 312/388] BUG: iloc.__setitem__ with duplicate columns (#32477) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/frame.py | 14 ++++++++++ pandas/core/generic.py | 4 +++ pandas/core/indexing.py | 44 ++++++++++++++++++------------ pandas/core/internals/managers.py | 7 +++-- pandas/tests/indexing/test_iloc.py | 30 +++++++++++++++++++- 6 files changed, 80 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index bd55a25b22e0d..626e42671c1a7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -263,7 +263,7 @@ Indexing - Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`) - Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`) -- +- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`) Missing ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efb922fa76094..da6a6e0a16393 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2708,6 +2708,20 @@ def _setitem_frame(self, key, value): self._check_setitem_copy() self._where(-key, value, inplace=True) + def _iset_item(self, loc: int, value): + self._ensure_valid_index(value) + + # technically _sanitize_column expects a label, not a position, + # but the behavior is the same as long as we pass broadcast=False + value = self._sanitize_column(loc, value, broadcast=False) + NDFrame._iset_item(self, loc, value) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + def _set_item(self, key, value): """ Add series to DataFrame in specified column. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index add2215645f2f..d5f5e39911ec2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3579,6 +3579,10 @@ def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries: result._set_is_copy(self, copy=is_copy) return result + def _iset_item(self, loc: int, value) -> None: + self._data.iset(loc, value) + self._clear_item_cache() + def _set_item(self, key, value) -> None: self._data.set(key, value) self._clear_item_cache() diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9a671c7fc170a..c9362a0527c06 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1615,6 +1615,12 @@ def _setitem_with_indexer(self, indexer, value): info_idx = [info_idx] labels = item_labels[info_idx] + # Ensure we have something we can iterate over + ilocs = info_idx + if isinstance(info_idx, slice): + ri = Index(range(len(self.obj.columns))) + ilocs = ri[info_idx] + plane_indexer = indexer[:1] lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index) # lplane_indexer gives the expected length of obj[indexer[0]] @@ -1632,9 +1638,11 @@ def _setitem_with_indexer(self, indexer, value): "length than the value" ) - def setter(item, v): - ser = self.obj[item] - pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer + pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer + + def isetter(loc, v): + # positional setting on column loc + ser = self.obj._ixs(loc, axis=1) # perform the equivalent of a setitem on the info axis # as we have a null slice or a slice with full bounds @@ -1654,7 +1662,7 @@ def setter(item, v): ser._maybe_update_cacher(clear=True) # reset the sliced object if unique - self.obj[item] = ser + self.obj._iset_item(loc, ser) # we need an iterable, with a ndim of at least 1 # eg. don't pass through np.array(0) @@ -1664,8 +1672,10 @@ def setter(item, v): if isinstance(value, ABCDataFrame): sub_indexer = list(indexer) multiindex_indexer = isinstance(labels, ABCMultiIndex) + # TODO: we are implicitly assuming value.columns is unique - for item in labels: + for loc in ilocs: + item = item_labels[loc] if item in value: sub_indexer[info_axis] = item v = self._align_series( @@ -1674,7 +1684,7 @@ def setter(item, v): else: v = np.nan - setter(item, v) + isetter(loc, v) # we have an equal len ndarray/convertible to our labels # hasattr first, to avoid coercing to ndarray without reason. @@ -1685,16 +1695,15 @@ def setter(item, v): # note that this coerces the dtype if we are mixed # GH 7551 value = np.array(value, dtype=object) - if len(labels) != value.shape[1]: + if len(ilocs) != value.shape[1]: raise ValueError( "Must have equal len keys and value " "when setting with an ndarray" ) - for i, item in enumerate(labels): - + for i, loc in enumerate(ilocs): # setting with a list, re-coerces - setter(item, value[:, i].tolist()) + isetter(loc, value[:, i].tolist()) elif ( len(labels) == 1 @@ -1702,7 +1711,8 @@ def setter(item, v): and not is_scalar(plane_indexer[0]) ): # we have an equal len list/ndarray - setter(labels[0], value) + # We only get here with len(labels) == len(ilocs) == 1 + isetter(ilocs[0], value) elif lplane_indexer == 0 and len(value) == len(self.obj.index): # We get here in one case via .loc with a all-False mask @@ -1710,19 +1720,19 @@ def setter(item, v): else: # per-label values - if len(labels) != len(value): + if len(ilocs) != len(value): raise ValueError( "Must have equal len keys and value " "when setting with an iterable" ) - for item, v in zip(labels, value): - setter(item, v) + for loc, v in zip(ilocs, value): + isetter(loc, v) else: - # scalar - for item in labels: - setter(item, value) + # scalar value + for loc in ilocs: + isetter(loc, value) else: if isinstance(indexer, tuple): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 88e6eab7f2fc2..5687cb27728e8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1096,7 +1096,10 @@ def value_getitem(placement): "Shape of new values must be compatible with manager shape" ) - if isinstance(loc, int): + if lib.is_integer(loc): + # We have 6 tests where loc is _not_ an int. + # In this case, get_blkno_placements will yield only one tuple, + # containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1))) loc = [loc] # Accessing public blknos ensures the public versions are initialized @@ -1148,7 +1151,7 @@ def value_getitem(placement): # one item. new_blocks.extend( make_block( - values=value.copy(), + values=value, ndim=self.ndim, placement=slice(mgr_loc, mgr_loc + 1), ) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 683d4f2605712..f6b9e9a44ba14 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -349,7 +349,6 @@ def test_iloc_setitem_dups(self): df = concat([df1, df2], axis=1) expected = df.fillna(3) - expected["A"] = expected["A"].astype("float64") inds = np.isnan(df.iloc[:, 0]) mask = inds[inds].index df.iloc[mask, 0] = df.iloc[mask, 2] @@ -694,3 +693,32 @@ def test_series_indexing_zerodim_np_array(self): s = Series([1, 2]) result = s.iloc[np.array(0)] assert result == 1 + + +class TestILocSetItemDuplicateColumns: + def test_iloc_setitem_scalar_duplicate_columns(self): + # GH#15686, duplicate columns and mixed dtype + df1 = pd.DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = pd.DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = pd.concat([df1, df2], axis=1) + df.iloc[0, 0] = -1 + + assert df.iloc[0, 0] == -1 + assert df.iloc[0, 2] == 3 + assert df.dtypes.iloc[2] == np.int64 + + def test_iloc_setitem_list_duplicate_columns(self): + # GH#22036 setting with same-sized list + df = pd.DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"]) + + df.iloc[:, 2] = ["str3"] + + expected = pd.DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"]) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_series_duplicate_columns(self): + df = pd.DataFrame( + np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"] + ) + df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64) + assert df.dtypes.iloc[2] == np.int64 From a0972f4e9bb114c78853aebe628fecfb9dcacf78 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:39:18 -0700 Subject: [PATCH 313/388] CLN: avoid values_from_object in Index.equals (#32505) --- pandas/core/indexes/base.py | 15 +++++++-------- pandas/core/indexes/multi.py | 7 +++---- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 93ed301566ddc..2d5403eeda940 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4120,7 +4120,6 @@ def __getitem__(self, key): if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) - key = com.values_from_object(key) result = getitem(key) if not is_scalar(result): if np.ndim(result) > 1: @@ -4245,19 +4244,19 @@ def equals(self, other) -> bool: if not isinstance(other, Index): return False - if is_object_dtype(self) and not is_object_dtype(other): + if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype): # if other is not object, use other's logic for coercion return other.equals(self) if isinstance(other, ABCMultiIndex): # d-level MultiIndex can equal d-tuple Index - if not is_object_dtype(self.dtype): - if self.nlevels != other.nlevels: - return False + return other.equals(self) - return array_equivalent( - com.values_from_object(self), com.values_from_object(other) - ) + if is_extension_array_dtype(other.dtype): + # All EA-backed Index subclasses override equals + return other.equals(self) + + return array_equivalent(self._values, other._values) def identical(self, other) -> bool: """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c22b289bb4017..0bb88145646ed 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3141,11 +3141,10 @@ def equals(self, other) -> bool: if not isinstance(other, MultiIndex): # d-level MultiIndex can equal d-tuple Index if not is_object_dtype(other.dtype): - if self.nlevels != other.nlevels: - return False + # other cannot contain tuples, so cannot match self + return False - other_vals = com.values_from_object(other) - return array_equivalent(self._values, other_vals) + return array_equivalent(self._values, other._values) if self.nlevels != other.nlevels: return False From 4867415a465943ec73f04534dcc10c23ea802420 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:40:13 -0700 Subject: [PATCH 314/388] REF: Remove BlockManager.rename_axis (#32349) --- pandas/core/generic.py | 6 ++--- pandas/core/indexes/base.py | 21 ++++++++++++++++ pandas/core/internals/__init__.py | 2 -- pandas/core/internals/managers.py | 41 +------------------------------ pandas/core/reshape/merge.py | 4 +-- 5 files changed, 26 insertions(+), 48 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d5f5e39911ec2..f53135174741e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -967,7 +967,6 @@ def rename( continue ax = self._get_axis(axis_no) - baxis = self._get_block_manager_axis(axis_no) f = com.get_rename_function(replacements) if level is not None: @@ -984,9 +983,8 @@ def rename( ] raise KeyError(f"{missing_labels} not found in axis") - result._data = result._data.rename_axis( - f, axis=baxis, copy=copy, level=level - ) + new_index = ax._transform_index(f, level) + result.set_axis(new_index, axis=axis_no, inplace=True) result._clear_item_cache() if inplace: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2d5403eeda940..a5f133fb10d10 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4753,6 +4753,27 @@ def map(self, mapper, na_action=None): return Index(new_values, **attributes) + # TODO: De-duplicate with map, xref GH#32349 + def _transform_index(self, func, level=None) -> "Index": + """ + Apply function to all values found in index. + + This includes transforming multiindex entries separately. + Only apply function to one level of the MultiIndex if level is specified. + """ + if isinstance(self, ABCMultiIndex): + if level is not None: + items = [ + tuple(func(y) if i == level else y for i, y in enumerate(x)) + for x in self + ] + else: + items = [tuple(func(y) for y in x) for x in self] + return type(self).from_tuples(items, names=self.names) + else: + items = [func(x) for x in self] + return Index(items, name=self.name, tupleize_cols=False) + def isin(self, values, level=None): """ Return a boolean array where the index values are in `values`. diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 37a3405554745..e70652b81c42f 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -17,7 +17,6 @@ from pandas.core.internals.managers import ( BlockManager, SingleBlockManager, - _transform_index, concatenate_block_managers, create_block_manager_from_arrays, create_block_manager_from_blocks, @@ -40,7 +39,6 @@ "_block_shape", "BlockManager", "SingleBlockManager", - "_transform_index", "concatenate_block_managers", "create_block_manager_from_arrays", "create_block_manager_from_blocks", diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5687cb27728e8..0bba8de6682ea 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -34,7 +34,7 @@ from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject from pandas.core.indexers import maybe_convert_indices -from pandas.core.indexes.api import Index, MultiIndex, ensure_index +from pandas.core.indexes.api import Index, ensure_index from pandas.core.internals.blocks import ( Block, CategoricalBlock, @@ -226,23 +226,6 @@ def set_axis(self, axis: int, new_labels: Index) -> None: self.axes[axis] = new_labels - def rename_axis( - self, mapper, axis: int, copy: bool = True, level=None - ) -> "BlockManager": - """ - Rename one of axes. - - Parameters - ---------- - mapper : unary callable - axis : int - copy : bool, default True - level : int or None, default None - """ - obj = self.copy(deep=copy) - obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level)) - return obj - @property def _is_single_block(self) -> bool: if self.ndim == 1: @@ -1972,28 +1955,6 @@ def _compare_or_regex_search(a, b, regex=False): return result -def _transform_index(index, func, level=None): - """ - Apply function to all values found in index. - - This includes transforming multiindex entries separately. - Only apply function to one level of the MultiIndex if level is specified. - - """ - if isinstance(index, MultiIndex): - if level is not None: - items = [ - tuple(func(y) if i == level else y for i, y in enumerate(x)) - for x in index - ] - else: - items = [tuple(func(y) for y in x) for x in index] - return MultiIndex.from_tuples(items, names=index.names) - else: - items = [func(x) for x in index] - return Index(items, name=index.name, tupleize_cols=False) - - def _fast_count_smallints(arr): """Faster version of set(arr) for sequences of small numbers.""" counts = np.bincount(arr.astype(np.int_)) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 08c04cb49f125..e75dced21f488 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -46,7 +46,7 @@ from pandas.core.arrays.categorical import _recode_for_categories import pandas.core.common as com from pandas.core.frame import _merge_doc -from pandas.core.internals import _transform_index, concatenate_block_managers +from pandas.core.internals import concatenate_block_managers from pandas.core.sorting import is_int64_overflow_possible if TYPE_CHECKING: @@ -1993,4 +1993,4 @@ def renamer(x, suffix): lrenamer = partial(renamer, suffix=lsuffix) rrenamer = partial(renamer, suffix=rsuffix) - return (_transform_index(left, lrenamer), _transform_index(right, rrenamer)) + return (left._transform_index(lrenamer), right._transform_index(rrenamer)) From 2579484cdd85b8674118bc7ccc02af32c861cbb7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:52:00 -0700 Subject: [PATCH 315/388] CLN: simplify get_values usage in groupby (#32523) --- pandas/core/groupby/generic.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ac522fc7863b2..b7ac3048631c5 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -49,7 +49,7 @@ is_scalar, needs_i8_conversion, ) -from pandas.core.dtypes.missing import _isna_ndarraylike, isna, notna +from pandas.core.dtypes.missing import isna, notna from pandas.core.aggregation import ( is_multi_agg_with_relabel, @@ -1772,10 +1772,8 @@ def count(self): ids, _, ngroups = self.grouper.group_info mask = ids != -1 - vals = ( - (mask & ~_isna_ndarraylike(np.atleast_2d(blk.get_values()))) - for blk in data.blocks - ) + # TODO(2DEA): reshape would not be necessary with 2D EAs + vals = ((mask & ~isna(blk.values).reshape(blk.shape)) for blk in data.blocks) locs = (blk.mgr_locs for blk in data.blocks) counted = ( From 3984df125b7ec87e7bf89f6e7a2e75c821168139 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:52:54 -0700 Subject: [PATCH 316/388] CLN: avoid Block.get_values in io.sql (#32524) --- pandas/io/sql.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9a53e7cd241e1..560e7e4781cbb 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -705,8 +705,16 @@ def insert_data(self): else: # convert to microsecond resolution for datetime.datetime d = b.values.astype("M8[us]").astype(object) + elif b.is_timedelta: + # numpy converts this to an object array of integers, + # whereas b.astype(object).values would convert to + # object array of Timedeltas + d = b.values.astype(object) else: - d = np.array(b.get_values(), dtype=object) + # TODO(2DEA): astype-first can be avoided with 2D EAs + # astype on the block instead of values to ensure we + # get the right shape + d = b.astype(object).values # replace NaN with None if b._can_hold_na: From eac5cd1149925078420548231b459e0c45bb0c9a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 19:55:23 -0700 Subject: [PATCH 317/388] TST: fixturize skipna in test_nanops (#32607) --- pandas/tests/test_nanops.py | 152 ++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 77 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index f7e652eb78e2d..852e1ce489893 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -19,6 +19,14 @@ has_c16 = hasattr(np, "complex128") +@pytest.fixture(params=[True, False]) +def skipna(request): + """ + Fixture to pass skipna to nanops functions. + """ + return request.param + + class TestnanopsDataFrame: def setup_method(self, method): np.random.seed(11235) @@ -89,28 +97,14 @@ def teardown_method(self, method): def check_results(self, targ, res, axis, check_dtype=True): res = getattr(res, "asm8", res) - res = getattr(res, "values", res) - - # timedeltas are a beast here - def _coerce_tds(targ, res): - if hasattr(targ, "dtype") and targ.dtype == "m8[ns]": - if len(targ) == 1: - targ = targ[0].item() - res = res.item() - else: - targ = targ.view("i8") - return targ, res - try: - if ( - axis != 0 - and hasattr(targ, "shape") - and targ.ndim - and targ.shape != res.shape - ): - res = np.split(res, [targ.shape[0]], axis=0)[0] - except (ValueError, IndexError): - targ, res = _coerce_tds(targ, res) + if ( + axis != 0 + and hasattr(targ, "shape") + and targ.ndim + and targ.shape != res.shape + ): + res = np.split(res, [targ.shape[0]], axis=0)[0] try: tm.assert_almost_equal(targ, res, check_dtype=check_dtype) @@ -118,9 +112,7 @@ def _coerce_tds(targ, res): # handle timedelta dtypes if hasattr(targ, "dtype") and targ.dtype == "m8[ns]": - targ, res = _coerce_tds(targ, res) - tm.assert_almost_equal(targ, res, check_dtype=check_dtype) - return + raise # There are sometimes rounding errors with # complex and object dtypes. @@ -149,29 +141,29 @@ def check_fun_data( targfunc, testarval, targarval, + skipna, check_dtype=True, empty_targfunc=None, **kwargs, ): for axis in list(range(targarval.ndim)) + [None]: - for skipna in [False, True]: - targartempval = targarval if skipna else testarval - if skipna and empty_targfunc and isna(targartempval).all(): - targ = empty_targfunc(targartempval, axis=axis, **kwargs) - else: - targ = targfunc(targartempval, axis=axis, **kwargs) + targartempval = targarval if skipna else testarval + if skipna and empty_targfunc and isna(targartempval).all(): + targ = empty_targfunc(targartempval, axis=axis, **kwargs) + else: + targ = targfunc(targartempval, axis=axis, **kwargs) - res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs) + res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if skipna: + res = testfunc(testarval, axis=axis, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if axis is None: + res = testfunc(testarval, skipna=skipna, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if skipna and axis is None: + res = testfunc(testarval, **kwargs) self.check_results(targ, res, axis, check_dtype=check_dtype) - if skipna: - res = testfunc(testarval, axis=axis, **kwargs) - self.check_results(targ, res, axis, check_dtype=check_dtype) - if axis is None: - res = testfunc(testarval, skipna=skipna, **kwargs) - self.check_results(targ, res, axis, check_dtype=check_dtype) - if skipna and axis is None: - res = testfunc(testarval, **kwargs) - self.check_results(targ, res, axis, check_dtype=check_dtype) if testarval.ndim <= 1: return @@ -184,12 +176,15 @@ def check_fun_data( targfunc, testarval2, targarval2, + skipna=skipna, check_dtype=check_dtype, empty_targfunc=empty_targfunc, **kwargs, ) - def check_fun(self, testfunc, targfunc, testar, empty_targfunc=None, **kwargs): + def check_fun( + self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs + ): targar = testar if testar.endswith("_nan") and hasattr(self, testar[:-4]): @@ -202,6 +197,7 @@ def check_fun(self, testfunc, targfunc, testar, empty_targfunc=None, **kwargs): targfunc, testarval, targarval, + skipna=skipna, empty_targfunc=empty_targfunc, **kwargs, ) @@ -210,6 +206,7 @@ def check_funs( self, testfunc, targfunc, + skipna, allow_complex=True, allow_all_nan=True, allow_date=True, @@ -217,10 +214,10 @@ def check_funs( allow_obj=True, **kwargs, ): - self.check_fun(testfunc, targfunc, "arr_float", **kwargs) - self.check_fun(testfunc, targfunc, "arr_float_nan", **kwargs) - self.check_fun(testfunc, targfunc, "arr_int", **kwargs) - self.check_fun(testfunc, targfunc, "arr_bool", **kwargs) + self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs) objs = [ self.arr_float.astype("O"), self.arr_int.astype("O"), @@ -228,18 +225,18 @@ def check_funs( ] if allow_all_nan: - self.check_fun(testfunc, targfunc, "arr_nan", **kwargs) + self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs) if allow_complex: - self.check_fun(testfunc, targfunc, "arr_complex", **kwargs) - self.check_fun(testfunc, targfunc, "arr_complex_nan", **kwargs) + self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs) + self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs) if allow_all_nan: - self.check_fun(testfunc, targfunc, "arr_nan_nanj", **kwargs) + self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs) objs += [self.arr_complex.astype("O")] if allow_date: targfunc(self.arr_date) - self.check_fun(testfunc, targfunc, "arr_date", **kwargs) + self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs) objs += [self.arr_date.astype("O")] if allow_tdelta: @@ -248,7 +245,7 @@ def check_funs( except TypeError: pass else: - self.check_fun(testfunc, targfunc, "arr_tdelta", **kwargs) + self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs) objs += [self.arr_tdelta.astype("O")] if allow_obj: @@ -260,7 +257,7 @@ def check_funs( targfunc = partial( self._badobj_wrap, func=targfunc, allow_complex=allow_complex ) - self.check_fun(testfunc, targfunc, "arr_obj", **kwargs) + self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs) def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): if value.dtype.kind == "O": @@ -273,28 +270,22 @@ def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): @pytest.mark.parametrize( "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)] ) - def test_nan_funcs(self, nan_op, np_op): - # TODO: allow tdelta, doesn't break tests - self.check_funs( - nan_op, np_op, allow_all_nan=False, allow_date=False, allow_tdelta=False - ) + def test_nan_funcs(self, nan_op, np_op, skipna): + self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False) - def test_nansum(self): + def test_nansum(self, skipna): self.check_funs( nanops.nansum, np.sum, + skipna, allow_date=False, check_dtype=False, empty_targfunc=np.nansum, ) - def test_nanmean(self): + def test_nanmean(self, skipna): self.check_funs( - nanops.nanmean, - np.mean, - allow_complex=False, # TODO: allow this, doesn't break test - allow_obj=False, - allow_date=False, + nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False, ) def test_nanmean_overflow(self): @@ -336,22 +327,24 @@ def test_returned_dtype(self, dtype): else: assert result.dtype == dtype - def test_nanmedian(self): + def test_nanmedian(self, skipna): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) self.check_funs( nanops.nanmedian, np.median, + skipna, allow_complex=False, allow_date=False, allow_obj="convert", ) @pytest.mark.parametrize("ddof", range(3)) - def test_nanvar(self, ddof): + def test_nanvar(self, ddof, skipna): self.check_funs( nanops.nanvar, np.var, + skipna, allow_complex=False, allow_date=False, allow_obj="convert", @@ -359,10 +352,11 @@ def test_nanvar(self, ddof): ) @pytest.mark.parametrize("ddof", range(3)) - def test_nanstd(self, ddof): + def test_nanstd(self, ddof, skipna): self.check_funs( nanops.nanstd, np.std, + skipna, allow_complex=False, allow_date=False, allow_obj="convert", @@ -371,13 +365,14 @@ def test_nanstd(self, ddof): @td.skip_if_no_scipy @pytest.mark.parametrize("ddof", range(3)) - def test_nansem(self, ddof): + def test_nansem(self, ddof, skipna): from scipy.stats import sem with np.errstate(invalid="ignore"): self.check_funs( nanops.nansem, sem, + skipna, allow_complex=False, allow_date=False, allow_tdelta=False, @@ -388,10 +383,10 @@ def test_nansem(self, ddof): @pytest.mark.parametrize( "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)] ) - def test_nanops_with_warnings(self, nan_op, np_op): + def test_nanops_with_warnings(self, nan_op, np_op, skipna): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - self.check_funs(nan_op, np_op, allow_obj=False) + self.check_funs(nan_op, np_op, skipna, allow_obj=False) def _argminmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -408,17 +403,17 @@ def _argminmax_wrap(self, value, axis=None, func=None): res = -1 return res - def test_nanargmax(self): + def test_nanargmax(self, skipna): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._argminmax_wrap, func=np.argmax) - self.check_funs(nanops.nanargmax, func, allow_obj=False) + self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False) - def test_nanargmin(self): + def test_nanargmin(self, skipna): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._argminmax_wrap, func=np.argmin) - self.check_funs(nanops.nanargmin, func, allow_obj=False) + self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False) def _skew_kurt_wrap(self, values, axis=None, func=None): if not isinstance(values.dtype.type, np.floating): @@ -433,7 +428,7 @@ def _skew_kurt_wrap(self, values, axis=None, func=None): return result @td.skip_if_no_scipy - def test_nanskew(self): + def test_nanskew(self, skipna): from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) @@ -441,13 +436,14 @@ def test_nanskew(self): self.check_funs( nanops.nanskew, func, + skipna, allow_complex=False, allow_date=False, allow_tdelta=False, ) @td.skip_if_no_scipy - def test_nankurt(self): + def test_nankurt(self, skipna): from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) @@ -456,15 +452,17 @@ def test_nankurt(self): self.check_funs( nanops.nankurt, func, + skipna, allow_complex=False, allow_date=False, allow_tdelta=False, ) - def test_nanprod(self): + def test_nanprod(self, skipna): self.check_funs( nanops.nanprod, np.prod, + skipna, allow_date=False, allow_tdelta=False, empty_targfunc=np.nanprod, From e8e02c0cbfe676e4c33382a7be920e6fdc511ffb Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 11 Mar 2020 03:06:02 +0000 Subject: [PATCH 318/388] PERF: copy cached attributes on index shallow_copy (#32568) --- doc/source/whatsnew/v1.1.0.rst | 4 +++- pandas/core/indexes/base.py | 10 +++++++--- pandas/core/indexes/numeric.py | 8 ++------ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 626e42671c1a7..c473500c205d8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -188,7 +188,9 @@ Performance improvements - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) -- +- The internal :meth:`Index._shallow_copy` now copies cached attributes over to the new index, + avoiding creating these again on the new index. This can speed up many operations + that depend on creating copies of existing indexes (:issue:`28584`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a5f133fb10d10..98e3b3ad258ea 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import TYPE_CHECKING, Any, FrozenSet, Hashable, Union +from typing import TYPE_CHECKING, Any, Dict, FrozenSet, Hashable, Union import warnings import numpy as np @@ -250,6 +250,7 @@ def _outer_indexer(self, left, right): _typ = "index" _data: Union[ExtensionArray, np.ndarray] + _cache: Dict[str, Any] _id = None _name: Label = None # MultiIndex.levels previously allowed setting the index name. We @@ -468,6 +469,7 @@ def _simple_new(cls, values, name: Label = None): # we actually set this value too. result._index_data = values result._name = name + result._cache = {} return result._reset_identity() @@ -498,11 +500,13 @@ def _shallow_copy(self, values=None, name: Label = no_default): name : Label, defaults to self.name """ name = self.name if name is no_default else name - + cache = self._cache.copy() if values is None else {} if values is None: values = self.values - return self._simple_new(values, name=name) + result = self._simple_new(values, name=name) + result._cache = cache + return result def _shallow_copy_with_infer(self, values, **kwargs): """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 6c250ccd09a51..2d1d69772c100 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -104,15 +104,11 @@ def _maybe_cast_slice_bound(self, label, side, kind): @Appender(Index._shallow_copy.__doc__) def _shallow_copy(self, values=None, name: Label = lib.no_default): - name = name if name is not lib.no_default else self.name - if values is not None and not self._can_hold_na and values.dtype.kind == "f": + name = self.name if name is lib.no_default else name # Ensure we are not returning an Int64Index with float data: return Float64Index._simple_new(values, name=name) - - if values is None: - values = self.values - return type(self)._simple_new(values, name=name) + return super()._shallow_copy(values=values, name=name) def _convert_for_op(self, value): """ From 362aef1d1aaed856dce4615cab4760a6aa677816 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 11 Mar 2020 03:10:37 +0000 Subject: [PATCH 319/388] DOC: Fix link to monthly meeting calendar (#32602) --- doc/source/development/meeting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/meeting.rst b/doc/source/development/meeting.rst index 1d19408692cda..803f1b7002de0 100644 --- a/doc/source/development/meeting.rst +++ b/doc/source/development/meeting.rst @@ -25,7 +25,7 @@ This calendar shows all the developer meetings. You can subscribe to this calendar with the following links: * `iCal `__ -* `Google calendar `__ +* `Google calendar `__ Additionally, we'll sometimes have one-off meetings on specific topics. These will be published on the same calendar. From 817d57dbd3dd71db4393027042ea4f736021403d Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Tue, 10 Mar 2020 20:19:10 -0700 Subject: [PATCH 320/388] TYP: Add type hint for DataFrame.T and certain array types (#32532) --- pandas/core/arrays/categorical.py | 2 +- pandas/core/arrays/sparse/array.py | 4 ++-- pandas/core/frame.py | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9dfe68b16ad6a..bcb3fa53e311b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1317,7 +1317,7 @@ def __setstate__(self, state): setattr(self, k, v) @property - def T(self): + def T(self) -> "Categorical": """ Return transposed numpy array. """ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 549606795f528..3b5e6b2d2bcd3 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1296,14 +1296,14 @@ def mean(self, axis=0, *args, **kwargs): nsparse = self.sp_index.ngaps return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) - def transpose(self, *axes): + def transpose(self, *axes) -> "SparseArray": """ Returns the SparseArray. """ return self @property - def T(self): + def T(self) -> "SparseArray": """ Returns the SparseArray. """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da6a6e0a16393..30abcafb56ffb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2445,7 +2445,9 @@ def transpose(self, *args, copy: bool = False) -> "DataFrame": return result.__finalize__(self) - T = property(transpose) + @property + def T(self) -> "DataFrame": + return self.transpose() # ---------------------------------------------------------------------- # Indexing Methods From 7fa8ee7289c62c18cb1bb2218c047a181a1c7968 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Wed, 11 Mar 2020 04:53:51 +0100 Subject: [PATCH 321/388] CLN: Split and fixturized test_fillna in tests/base/test_ops.py (#32483) --- pandas/tests/base/test_ops.py | 90 +++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index c368db13b9017..5fb5072e5c9d9 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -18,6 +18,7 @@ is_object_dtype, needs_i8_conversion, ) +from pandas.core.dtypes.generic import ABCMultiIndex import pandas as pd from pandas import ( @@ -694,58 +695,65 @@ def test_drop_duplicates_series_vs_dataframe(self): dropped_series = df[column].drop_duplicates(keep=keep) tm.assert_frame_equal(dropped_frame, dropped_series.to_frame()) - def test_fillna(self): + def test_fillna(self, index_or_series_obj): # # GH 11343 # though Index.fillna and Series.fillna has separate impl, # test here to confirm these works as the same - for orig in self.objs: - - o = orig.copy() - values = o.values - - # values will not be changed - result = o.fillna(o.astype(object).values[0]) - if isinstance(o, Index): - tm.assert_index_equal(o, result) - else: - tm.assert_series_equal(o, result) - # check shallow_copied - assert o is not result - - for null_obj in [np.nan, None]: - for orig in self.objs: - o = orig.copy() - klass = type(o) + obj = index_or_series_obj + if isinstance(obj, ABCMultiIndex): + pytest.skip("MultiIndex doesn't support isna") + + # values will not be changed + fill_value = obj.values[0] if len(obj) > 0 else 0 + result = obj.fillna(fill_value) + if isinstance(obj, Index): + tm.assert_index_equal(obj, result) + else: + tm.assert_series_equal(obj, result) - if not allow_na_ops(o): - continue + # check shallow_copied + if isinstance(obj, Series) and len(obj) == 0: + # TODO: GH-32543 + pytest.xfail("Shallow copy for empty Series is bugged") + assert obj is not result - if needs_i8_conversion(o): + @pytest.mark.parametrize("null_obj", [np.nan, None]) + def test_fillna_null(self, null_obj, index_or_series_obj): + # # GH 11343 + # though Index.fillna and Series.fillna has separate impl, + # test here to confirm these works as the same + obj = index_or_series_obj + klass = type(obj) - values = o.astype(object).values - fill_value = values[0] - values[0:2] = pd.NaT - else: - values = o.values.copy() - fill_value = o.values[0] - values[0:2] = null_obj + if not allow_na_ops(obj): + pytest.skip(f"{klass} doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(obj, ABCMultiIndex): + pytest.skip(f"MultiIndex can't hold '{null_obj}'") - expected = [fill_value] * 2 + list(values[2:]) + values = obj.values + fill_value = values[0] + expected = values.copy() + if needs_i8_conversion(obj): + values[0:2] = iNaT + expected[0:2] = fill_value + else: + values[0:2] = null_obj + expected[0:2] = fill_value - expected = klass(expected, dtype=orig.dtype) - o = klass(values) + expected = klass(expected) + obj = klass(values) - # check values has the same dtype as the original - assert o.dtype == orig.dtype + result = obj.fillna(fill_value) + if isinstance(obj, Index): + tm.assert_index_equal(result, expected) + else: + tm.assert_series_equal(result, expected) - result = o.fillna(fill_value) - if isinstance(o, Index): - tm.assert_index_equal(result, expected) - else: - tm.assert_series_equal(result, expected) - # check shallow_copied - assert o is not result + # check shallow_copied + assert obj is not result @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self, index_or_series_obj): From f4f1b694f73bf427f8d296632bb4dbbc7ee184fd Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 11 Mar 2020 12:58:02 +0000 Subject: [PATCH 322/388] CI: Update web and docs to OVH with the right structure (#32530) --- .github/workflows/ci.yml | 70 ++++++++++------------------------------ 1 file changed, 17 insertions(+), 53 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a337ccbc98650..025b6f1813df7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -125,68 +125,32 @@ jobs: - name: Check ipython directive errors run: "! grep -B1 \"^<<<-------------------------------------------------------------------------$\" sphinx.log" - - name: Merge website and docs - run: | - mkdir -p pandas_web/docs - cp -r web/build/* pandas_web/ - cp -r doc/build/html/* pandas_web/docs/ - if: github.event_name == 'push' - - name: Install Rclone run: sudo apt install rclone -y if: github.event_name == 'push' - name: Set up Rclone run: | - RCLONE_CONFIG_PATH=$HOME/.config/rclone/rclone.conf - mkdir -p `dirname $RCLONE_CONFIG_PATH` - echo "[ovh_cloud_pandas_web]" > $RCLONE_CONFIG_PATH - echo "type = swift" >> $RCLONE_CONFIG_PATH - echo "env_auth = false" >> $RCLONE_CONFIG_PATH - echo "auth_version = 3" >> $RCLONE_CONFIG_PATH - echo "auth = https://auth.cloud.ovh.net/v3/" >> $RCLONE_CONFIG_PATH - echo "endpoint_type = public" >> $RCLONE_CONFIG_PATH - echo "tenant_domain = default" >> $RCLONE_CONFIG_PATH - echo "tenant = 2977553886518025" >> $RCLONE_CONFIG_PATH - echo "domain = default" >> $RCLONE_CONFIG_PATH - echo "user = w4KGs3pmDxpd" >> $RCLONE_CONFIG_PATH - echo "key = ${{ secrets.ovh_object_store_key }}" >> $RCLONE_CONFIG_PATH - echo "region = BHS" >> $RCLONE_CONFIG_PATH + CONF=$HOME/.config/rclone/rclone.conf + mkdir -p `dirname $CONF` + echo "[ovh_host]" > $CONF + echo "type = swift" >> $CONF + echo "env_auth = false" >> $CONF + echo "auth_version = 3" >> $CONF + echo "auth = https://auth.cloud.ovh.net/v3/" >> $CONF + echo "endpoint_type = public" >> $CONF + echo "tenant_domain = default" >> $CONF + echo "tenant = 2977553886518025" >> $CONF + echo "domain = default" >> $CONF + echo "user = w4KGs3pmDxpd" >> $CONF + echo "key = ${{ secrets.ovh_object_store_key }}" >> $CONF + echo "region = BHS" >> $CONF if: github.event_name == 'push' - name: Sync web with OVH - run: rclone sync pandas_web ovh_cloud_pandas_web:dev - if: github.event_name == 'push' - - - name: Create git repo to upload the built docs to GitHub pages - run: | - cd pandas_web - git init - touch .nojekyll - echo "dev.pandas.io" > CNAME - printf "User-agent: *\nDisallow: /" > robots.txt - git add --all . - git config user.email "pandas-dev@python.org" - git config user.name "pandas-bot" - git commit -m "pandas web and documentation in master" + run: rclone sync --exclude pandas-docs/** web/build ovh_host:prod if: github.event_name == 'push' - # For this task to work, next steps are required: - # 1. Generate a pair of private/public keys (i.e. `ssh-keygen -t rsa -b 4096 -C "your_email@example.com"`) - # 2. Go to https://github.com/pandas-dev/pandas/settings/secrets - # 3. Click on "Add a new secret" - # 4. Name: "github_pagas_ssh_key", Value: - # 5. The public key needs to be upladed to https://github.com/pandas-dev/pandas-dev.github.io/settings/keys - - name: Install GitHub pages ssh deployment key - uses: shimataro/ssh-key-action@v2 - with: - key: ${{ secrets.github_pages_ssh_key }} - known_hosts: 'github.com,192.30.252.128 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==' - if: github.event_name == 'push' - - - name: Publish web and docs to GitHub pages - run: | - cd pandas_web - git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git - git push -f origin master || true + - name: Sync dev docs with OVH + run: rclone sync doc/build/html ovh_host:prod/pandas-docs/dev if: github.event_name == 'push' From c1fd95bece36815d7b3fa09f1e103a5218c5f5e9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 11 Mar 2020 14:45:29 +0100 Subject: [PATCH 323/388] DOC: fix formatting / links of API refs in 1.0.2 whatsnew (#32620) --- doc/source/whatsnew/v1.0.2.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index d99ce468d402f..10191542b6d41 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -17,16 +17,17 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`) - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) -- Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`) -- Fixed regression in :meth:`pandas.core.window.Rolling.corr` when using a time offset (:issue:`31789`) -- Fixed regression in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) +- Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) +- Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) +- Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) -- Fixed regression in :meth:`pandas.core.groupby.GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) -- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) -- Fixed bug in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) +- Fixed regression in :meth:`groupby(..).agg() ` calling a user-provided function an extra time on an empty input (:issue:`31760`) +- Fixed regression in joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` to preserve ``freq`` in simple cases (:issue:`32166`) +- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) -- +- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) + .. --------------------------------------------------------------------------- @@ -64,7 +65,6 @@ Bug fixes **Datetimelike** -- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) - Bug in :meth:`Series.astype` not copying for tz-naive and tz-aware datetime64 dtype (:issue:`32490`) - Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`) - Improved error message when subtracting two :class:`Timestamp` that result in an out-of-bounds :class:`Timedelta` (:issue:`31774`) @@ -83,11 +83,11 @@ Bug fixes **Experimental dtypes** -- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). +- Fixed bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). +- Fixed bug in :meth:`DataFrame.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug in :meth:`DataFrame.convert_dtypes` where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) -- Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) -- Fixed bug in :meth:`DataFrame.convert_dtypes`, where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) **Strings** From 983fae65fbadb83a064dfec53031d875d8e13928 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Wed, 11 Mar 2020 16:02:31 +0100 Subject: [PATCH 324/388] BUG: non-iterable value in meta raise error in json_normalize (#31524) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/io/json/_normalize.py | 20 +++++++++++++++++--- pandas/tests/io/json/test_normalize.py | 10 ++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 10191542b6d41..123dfa07f4331 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -78,6 +78,7 @@ Bug fixes **I/O** - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +- Bug in :meth:`pandas.json_normalize` when value in meta path is not iterable (:issue:`31507`) - Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`) - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 4b153d3cb69bf..6e68c1cf5e27e 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -8,6 +8,7 @@ import numpy as np from pandas._libs.writers import convert_json_to_lines +from pandas._typing import Scalar from pandas.util._decorators import deprecate import pandas as pd @@ -226,14 +227,28 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + def _pull_field( + js: Dict[str, Any], spec: Union[List, str] + ) -> Union[Scalar, Iterable]: + """Internal function to pull field""" result = js # type: ignore if isinstance(spec, list): for field in spec: result = result[field] else: result = result[spec] + return result + + def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + """ + Interal function to pull field for records, and similar to + _pull_field, but require to return Iterable. And will raise error + if has non iterable value. + """ + result = _pull_field(js, spec) + # GH 31507 GH 30145, if result is not Iterable, raise TypeError if not + # null, otherwise return an empty list if not isinstance(result, Iterable): if pd.isnull(result): result = [] # type: ignore @@ -242,7 +257,6 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: f"{js} has non iterable value {result} for path {spec}. " "Must be iterable or null." ) - return result if isinstance(data, list) and not data: @@ -292,7 +306,7 @@ def _recursive_extract(data, path, seen_meta, level=0): _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) else: for obj in data: - recs = _pull_field(obj, path[0]) + recs = _pull_records(obj, path[0]) recs = [ nested_to_record(r, sep=sep, max_level=max_level) if isinstance(r, dict) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 91b204ed41ebc..b7a9918ff46da 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -486,6 +486,16 @@ def test_non_interable_record_path_errors(self): with pytest.raises(TypeError, match=msg): json_normalize([test_input], record_path=[test_path]) + def test_meta_non_iterable(self): + # GH 31507 + data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]""" + + result = json_normalize(json.loads(data), record_path=["data"], meta=["id"]) + expected = DataFrame( + {"one": [1], "two": [2], "id": np.array([99], dtype=object)} + ) + tm.assert_frame_equal(result, expected) + class TestNestedToRecord: def test_flat_stays_flat(self): From 8ac101d7c3d99871bc07052697b7f50863df68c6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 09:40:49 -0700 Subject: [PATCH 325/388] TST: revert parts of #32571 (#32630) --- pandas/tests/frame/test_analytics.py | 31 +++++++++++++++++++++++----- pandas/tests/io/test_sql.py | 6 ++++-- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 82f8a102f9c64..3964e790c7c12 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -32,6 +32,7 @@ def assert_stat_op_calc( has_skipna=True, check_dtype=True, check_dates=False, + check_less_precise=False, skipna_alternative=None, ): """ @@ -53,6 +54,9 @@ def assert_stat_op_calc( "alternative(frame)" should be checked. check_dates : bool, default false Whether opname should be tested on a Datetime Series + check_less_precise : bool, default False + Whether results should only be compared approximately; + passed on to tm.assert_series_equal skipna_alternative : function, default None NaN-safe version of alternative """ @@ -80,11 +84,17 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) tm.assert_series_equal( - result0, frame.apply(wrapper), check_dtype=check_dtype, + result0, + frame.apply(wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise, ) # HACK: win32 tm.assert_series_equal( - result1, frame.apply(wrapper, axis=1), check_dtype=False, + result1, + frame.apply(wrapper, axis=1), + check_dtype=False, + check_less_precise=check_less_precise, ) else: skipna_wrapper = alternative @@ -92,12 +102,17 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) tm.assert_series_equal( - result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, + result0, + frame.apply(skipna_wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise, ) if opname in ["sum", "prod"]: expected = frame.apply(skipna_wrapper, axis=1) - tm.assert_series_equal(result1, expected, check_dtype=False) + tm.assert_series_equal( + result1, expected, check_dtype=False, check_less_precise=check_less_precise + ) # check dtypes if check_dtype: @@ -316,9 +331,15 @@ def kurt(x): check_dates=True, ) + # GH#32571 check_less_precise is needed on apparently-random + # py37-npdev builds and OSX-PY36-min_version builds # mixed types (with upcasting happening) assert_stat_op_calc( - "sum", np.sum, mixed_float_frame.astype("float32"), check_dtype=False, + "sum", + np.sum, + mixed_float_frame.astype("float32"), + check_dtype=False, + check_less_precise=True, ) assert_stat_op_calc( diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 86502a67e1869..bf0ed4fe25346 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2372,7 +2372,7 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame) + tm.assert_frame_equal(result, frame, check_less_precise=True) def test_execute(self): frame = tm.makeTimeDataFrame() @@ -2632,7 +2632,9 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame) + tm.assert_frame_equal(result, frame, check_less_precise=True) + # GH#32571 result comes back rounded to 6 digits in some builds; + # no obvious pattern def test_chunksize_read_type(self): frame = tm.makeTimeDataFrame() From ecb5b57d4ecdc271aad9e2d3aa5b99f68406d9dc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 11 Mar 2020 13:34:47 -0500 Subject: [PATCH 326/388] Ensure valid Block mutation in SeriesBinGrouper. (#32561) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/_libs/reduction.pyx | 2 ++ pandas/tests/groupby/test_bin_groupby.py | 25 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 123dfa07f4331..bc234bbd31662 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) +- Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`) - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`groupby(..).agg() ` calling a user-provided function an extra time on an empty input (:issue:`31760`) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index b27072aa66708..29a5a73ef08d0 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -177,6 +177,8 @@ cdef class _BaseGrouper: object.__setattr__(cached_ityp, '_index_data', islider.buf) cached_ityp._engine.clear_mapping() object.__setattr__(cached_typ._data._block, 'values', vslider.buf) + object.__setattr__(cached_typ._data._block, 'mgr_locs', + slice(len(vslider.buf))) object.__setattr__(cached_typ, '_index', cached_ityp) object.__setattr__(cached_typ, 'name', self.name) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index ff74d374e5e3f..152086c241a52 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -5,6 +5,7 @@ from pandas.core.dtypes.common import ensure_int64 +import pandas as pd from pandas import Index, Series, isna import pandas._testing as tm @@ -51,6 +52,30 @@ def test_series_bin_grouper(): tm.assert_almost_equal(counts, exp_counts) +def assert_block_lengths(x): + assert len(x) == len(x._data.blocks[0].mgr_locs) + return 0 + + +def cumsum_max(x): + x.cumsum().max() + return 0 + + +@pytest.mark.parametrize("func", [cumsum_max, assert_block_lengths]) +def test_mgr_locs_updated(func): + # https://github.com/pandas-dev/pandas/issues/31802 + # Some operations may require creating new blocks, which requires + # valid mgr_locs + df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]}) + result = df.groupby(["A", "B"]).agg(func) + expected = pd.DataFrame( + {"C": [0, 0]}, + index=pd.MultiIndex.from_product([["a"], ["a", "b"]], names=["A", "B"]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "binner,closed,expected", [ From a3642ecb34b8a65a9d899f17475612b3bff411d4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 11 Mar 2020 21:51:39 +0000 Subject: [PATCH 327/388] TYP: Remove _ensure_type (#32633) * remove _ensure_type by TomAugspurger * mypy fixup * add test * add whatsnew --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/base.py | 10 ---------- pandas/core/frame.py | 20 +++++++++----------- pandas/core/generic.py | 12 ++++++------ pandas/core/reshape/pivot.py | 4 +++- pandas/io/formats/format.py | 4 +--- pandas/tests/frame/indexing/test_indexing.py | 11 +++++++++++ 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index bc234bbd31662..ee4b265ce3ed9 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -28,6 +28,7 @@ Fixed regressions - Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) +- Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index 478b83f538b7d..40ff0640a5bc4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -9,7 +9,6 @@ import numpy as np import pandas._libs.lib as lib -from pandas._typing import T from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -87,15 +86,6 @@ def __sizeof__(self): # no memory_usage attribute, so fall back to object's 'sizeof' return super().__sizeof__() - def _ensure_type(self: T, obj) -> T: - """ - Ensure that an object has same type as self. - - Used by type checkers. - """ - assert isinstance(obj, type(self)), type(obj) - return obj - class NoNewAttributesMixin: """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 30abcafb56ffb..d391f1be2c49f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3637,7 +3637,7 @@ def reindex(self, *args, **kwargs) -> "DataFrame": # Pop these, since the values are in `kwargs` under different names kwargs.pop("axis", None) kwargs.pop("labels", None) - return self._ensure_type(super().reindex(**kwargs)) + return super().reindex(**kwargs) def drop( self, @@ -3955,8 +3955,8 @@ def replace( @Appender(_shared_docs["shift"] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "DataFrame": - return self._ensure_type( - super().shift(periods=periods, freq=freq, axis=axis, fill_value=fill_value) + return super().shift( + periods=periods, freq=freq, axis=axis, fill_value=fill_value ) def set_index( @@ -8409,14 +8409,12 @@ def isin(self, values) -> "DataFrame": from pandas.core.reshape.concat import concat values = collections.defaultdict(list, values) - return self._ensure_type( - concat( - ( - self.iloc[:, [i]].isin(values[col]) - for i, col in enumerate(self.columns) - ), - axis=1, - ) + return concat( + ( + self.iloc[:, [i]].isin(values[col]) + for i, col in enumerate(self.columns) + ), + axis=1, ) elif isinstance(values, Series): if not values.index.is_unique: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f53135174741e..6f743d7388574 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8442,9 +8442,9 @@ def _align_frame( ) if method is not None: - left = self._ensure_type( - left.fillna(method=method, axis=fill_axis, limit=limit) - ) + _left = left.fillna(method=method, axis=fill_axis, limit=limit) + assert _left is not None # needed for mypy + left = _left right = right.fillna(method=method, axis=fill_axis, limit=limit) # if DatetimeIndex have different tz, convert to UTC @@ -9977,9 +9977,9 @@ def pct_change( if fill_method is None: data = self else: - data = self._ensure_type( - self.fillna(method=fill_method, axis=axis, limit=limit) - ) + _data = self.fillna(method=fill_method, axis=axis, limit=limit) + assert _data is not None # needed for mypy + data = _data rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 if freq is not None: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 61aa34f724307..a8801d8ab3f6e 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -150,7 +150,9 @@ def pivot_table( table = table.sort_index(axis=1) if fill_value is not None: - table = table._ensure_type(table.fillna(fill_value, downcast="infer")) + _table = table.fillna(fill_value, downcast="infer") + assert _table is not None # needed for mypy + table = _table if margins: if dropna: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index c879eaeda64e0..f011293273c5b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -283,9 +283,7 @@ def _chk_truncate(self) -> None: series = series.iloc[:max_rows] else: row_num = max_rows // 2 - series = series._ensure_type( - concat((series.iloc[:row_num], series.iloc[-row_num:])) - ) + series = concat((series.iloc[:row_num], series.iloc[-row_num:])) self.tr_row_num = row_num else: self.tr_row_num = None diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index ade17860a99b7..7892030a6727e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1605,6 +1605,17 @@ def test_reindex_methods(self, method, expected_values): actual = df[::-1].reindex(target, method=switched_method) tm.assert_frame_equal(expected, actual) + def test_reindex_subclass(self): + # https://github.com/pandas-dev/pandas/issues/31925 + class MyDataFrame(DataFrame): + pass + + expected = DataFrame() + df = MyDataFrame() + result = df.reindex_like(expected) + + tm.assert_frame_equal(result, expected) + def test_reindex_methods_nearest_special(self): df = pd.DataFrame({"x": list(range(5))}) target = np.array([-0.1, 0.9, 1.1, 1.5]) From 1f2385eaac87b833786be83b1b25353d38cab099 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 11 Mar 2020 15:15:21 -0700 Subject: [PATCH 328/388] DOC: Reorganize Getting Started documentation pages (#32389) * DOC: Reorganize Getting Started documentation pages * Move some docs to user_guide, and adjust phrasing * Modify redirects, fix phrasing in getting_started * modify redirects Co-authored-by: Matt Roeschke --- doc/redirects.csv | 6 +-- doc/source/getting_started/index.rst | 51 +++++++++---------- doc/source/getting_started/tutorials.rst | 22 ++------ .../{getting_started => user_guide}/10min.rst | 0 .../basics.rst | 0 .../dsintro.rst | 0 doc/source/user_guide/index.rst | 5 ++ 7 files changed, 38 insertions(+), 46 deletions(-) rename doc/source/{getting_started => user_guide}/10min.rst (100%) rename doc/source/{getting_started => user_guide}/basics.rst (100%) rename doc/source/{getting_started => user_guide}/dsintro.rst (100%) diff --git a/doc/redirects.csv b/doc/redirects.csv index 3669ff4b7cc0b..b59ccf649ee21 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -7,13 +7,10 @@ release,whatsnew/index # getting started install,getting_started/install -10min,getting_started/10min -basics,getting_started/basics comparison_with_r,getting_started/comparison/comparison_with_r comparison_with_sql,getting_started/comparison/comparison_with_sql comparison_with_sas,getting_started/comparison/comparison_with_sas comparison_with_stata,getting_started/comparison/comparison_with_stata -dsintro,getting_started/dsintro overview,getting_started/overview tutorials,getting_started/tutorials @@ -38,6 +35,9 @@ text,user_guide/text timedeltas,user_guide/timedeltas timeseries,user_guide/timeseries visualization,user_guide/visualization +10min,user_guide/10min +basics,user_guide/basics +dsintro,user_guide/dsintro # development contributing,development/contributing diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index a2f8f79f22ae4..9ac8c58e1d8f2 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -9,8 +9,6 @@ Getting started Installation ------------ -Before you can use pandas, you’ll need to get it installed. - .. raw:: html
@@ -23,7 +21,7 @@ Before you can use pandas, you’ll need to get it installed.

-Pandas is part of the `Anaconda `__ distribution and can be +pandas is part of the `Anaconda `__ distribution and can be installed with Anaconda or Miniconda: .. raw:: html @@ -49,7 +47,7 @@ installed with Anaconda or Miniconda:

-Pandas can be installed via pip from `PyPI `__. +pandas can be installed via pip from `PyPI `__. .. raw:: html @@ -103,7 +101,7 @@ Intro to pandas

- What kind of data does Pandas handle? + What kind of data does pandas handle?
@@ -117,8 +115,8 @@ Intro to pandas
-When working with tabular data, such as data stored in spreadsheets or databases, Pandas is the right tool for you. Pandas will help you -to explore, clean and process your data. In Pandas, a data table is called a :class:`DataFrame`. +When working with tabular data, such as data stored in spreadsheets or databases, pandas is the right tool for you. pandas will help you +to explore, clean and process your data. In pandas, a data table is called a :class:`DataFrame`. .. image:: ../_static/schemas/01_table_dataframe.svg :align: center @@ -164,7 +162,7 @@ to explore, clean and process your data. In Pandas, a data table is called a :cl
-Pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these +pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these data sources is provided by function with the prefix ``read_*``. Similarly, the ``to_*`` methods are used to store data. .. image:: ../_static/schemas/02_io_readwrite.svg @@ -212,7 +210,7 @@ data sources is provided by function with the prefix ``read_*``. Similarly, the
Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the -data you need are available in Pandas. +data you need are available in pandas. .. image:: ../_static/schemas/03_subset_columns_rows.svg :align: center @@ -258,7 +256,7 @@ data you need are available in Pandas.
-Pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...) +pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...) corresponding to your data. .. image:: ../_static/schemas/04_plot_overview.svg @@ -492,7 +490,7 @@ Multiple tables can be concatenated both column wise as row wise and database-li
-Pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data. +pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data. .. raw:: html @@ -535,7 +533,7 @@ Pandas has great support for time series and has an extensive set of tools for w
-Data sets do not only contain numerical data. Pandas provides a wide range of functions to cleaning textual data and extract useful information from it. +Data sets do not only contain numerical data. pandas provides a wide range of functions to cleaning textual data and extract useful information from it. .. raw:: html @@ -568,9 +566,8 @@ Data sets do not only contain numerical data. Pandas provides a wide range of fu Coming from... -------------- -Currently working with other software for data manipulation in a tabular format? You're probably familiar to typical -data operations and know *what* to do with your tabular data, but lacking the syntax to execute these operations. Get to know -the pandas syntax by looking for equivalents from the software you already know: +Are you familiar with other software for manipulating tablular data? Learn +the pandas-equivalent operations compared to software you already know: .. raw:: html @@ -580,7 +577,7 @@ the pandas syntax by looking for equivalents from the software you already know:
R project logo
-

The R programming language provides the data.frame data structure and multiple packages, +

The R programming language provides the dataframe data structure and multiple packages, such as tidyverse use and extend data.frames for convenient data handling functionalities similar to pandas.

@@ -597,7 +594,7 @@ the pandas syntax by looking for equivalents from the software you already know:
SQL logo
-

Already familiar to SELECT, GROUP BY, JOIN,...? +

Already familiar to SELECT, GROUP BY, JOIN, etc.? Most of these SQL manipulations do have equivalents in pandas.

.. container:: custom-button @@ -615,7 +612,7 @@ the pandas syntax by looking for equivalents from the software you already know:

The data set included in the STATA statistical software suite corresponds - to the pandas data.frame. Many of the operations known from STATA have an equivalent + to the pandas dataframe. Many of the operations known from STATA have an equivalent in pandas.

.. container:: custom-button @@ -632,8 +629,8 @@ the pandas syntax by looking for equivalents from the software you already know: SAS logo

The SAS statistical software suite - also provides the data set corresponding to the pandas data.frame. - Also vectorized operations, filtering, string processing operations,... from SAS have similar + also provides the data set corresponding to the pandas dataframe. + Also SAS vectorized operations, filtering, string processing operations, and more have similar functions in pandas.

.. container:: custom-button @@ -648,11 +645,16 @@ the pandas syntax by looking for equivalents from the software you already know:
-Community tutorials -------------------- +Tutorials +--------- + +For a quick overview of pandas functionality, see :ref:`10 Minutes to pandas<10min>`. + +You can also reference the pandas `cheat sheet `_ +for a succinct guide for manipulating data with pandas. The community produces a wide variety of tutorials available online. Some of the -material is enlisted in the community contributed :ref:`tutorials`. +material is enlisted in the community contributed :ref:`communitytutorials`. .. If you update this toctree, also update the manual toctree in the @@ -664,9 +666,6 @@ material is enlisted in the community contributed :ref:`tutorials`. install overview - 10min intro_tutorials/index - basics - dsintro comparison/index tutorials diff --git a/doc/source/getting_started/tutorials.rst b/doc/source/getting_started/tutorials.rst index 434d791474807..fce4aa4beba60 100644 --- a/doc/source/getting_started/tutorials.rst +++ b/doc/source/getting_started/tutorials.rst @@ -1,24 +1,12 @@ -.. _tutorials: +.. _communitytutorials: {{ header }} -********* -Tutorials -********* +******************* +Community Tutorials +******************* -This is a guide to many pandas tutorials, geared mainly for new users. - -Internal guides -=============== - -pandas' own :ref:`10 Minutes to pandas<10min>`. - -More complex recipes are in the :ref:`Cookbook`. - -A handy pandas `cheat sheet `_. - -Community guides -================ +This is a guide to many pandas tutorials by the community, geared mainly for new users. pandas Cookbook by Julia Evans ------------------------------ diff --git a/doc/source/getting_started/10min.rst b/doc/source/user_guide/10min.rst similarity index 100% rename from doc/source/getting_started/10min.rst rename to doc/source/user_guide/10min.rst diff --git a/doc/source/getting_started/basics.rst b/doc/source/user_guide/basics.rst similarity index 100% rename from doc/source/getting_started/basics.rst rename to doc/source/user_guide/basics.rst diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/user_guide/dsintro.rst similarity index 100% rename from doc/source/getting_started/dsintro.rst rename to doc/source/user_guide/dsintro.rst diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst index 30b1c0b4eac0d..8226e72779588 100644 --- a/doc/source/user_guide/index.rst +++ b/doc/source/user_guide/index.rst @@ -12,6 +12,8 @@ pandas approaches the problem, with many examples throughout. Users brand-new to pandas should start with :ref:`10min`. +For a high level summary of the pandas fundamentals, see :ref:`dsintro` and :ref:`basics`. + Further information on any specific method can be obtained in the :ref:`api`. @@ -21,6 +23,9 @@ Further information on any specific method can be obtained in the .. toctree:: :maxdepth: 2 + 10min + dsintro + basics io indexing advanced From 5e27d0a1be506a1d1acf9d9a8864075c996bb9bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phan=20Taljaard?= Date: Thu, 12 Mar 2020 01:25:47 +0200 Subject: [PATCH 329/388] Clarify pivot_table fill_value description (#32618) --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d391f1be2c49f..5e8db3acc7ff3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5903,7 +5903,8 @@ def pivot(self, index=None, columns=None, values=None) -> "DataFrame": If dict is passed, the key is column to aggregate and value is function or list of functions. fill_value : scalar, default None - Value to replace missing values with. + Value to replace missing values with (in the resulting pivot table, + after aggregation). margins : bool, default False Add all row / columns (e.g. for subtotal / grand totals). dropna : bool, default True From 1b76440e1c9b0e731e0f7eeacf3c73da3c9d715e Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Thu, 12 Mar 2020 01:54:06 +0100 Subject: [PATCH 330/388] BUG: Add extra check for failing UTF-8 conversion (#32548) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/src/parse_helper.h | 3 +++ pandas/tests/io/data/excel/high_surrogate.xlsx | Bin 0 -> 10334 bytes pandas/tests/io/excel/test_readers.py | 8 ++++++++ 4 files changed, 12 insertions(+) create mode 100644 pandas/tests/io/data/excel/high_surrogate.xlsx diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c473500c205d8..035d8da74d1c1 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -305,6 +305,7 @@ I/O timestamps with ``version="2.0"`` (:issue:`31652`). - Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`) - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) +- Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) Plotting diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h index 7fbe7a04d5b22..2ada0a4bd173d 100644 --- a/pandas/_libs/src/parse_helper.h +++ b/pandas/_libs/src/parse_helper.h @@ -34,6 +34,9 @@ int floatify(PyObject *str, double *result, int *maybe_int) { data = PyBytes_AS_STRING(str); } else if (PyUnicode_Check(str)) { tmp = PyUnicode_AsUTF8String(str); + if (tmp == NULL) { + return -1; + } data = PyBytes_AS_STRING(tmp); } else { PyErr_SetString(PyExc_TypeError, "Invalid object type"); diff --git a/pandas/tests/io/data/excel/high_surrogate.xlsx b/pandas/tests/io/data/excel/high_surrogate.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1e29b6bee6586ba9eb72fe6c706ccc71bb439322 GIT binary patch literal 10334 zcmeHt1zQ~1wszwl2<{#nf;++834!3+!QCM^!L_jv2pZfyIH7TOf@^}ilK@{QGv_kH z%y<95J@q_YUDefVRqb8xyL79_!@yz#-~osL0DuC(tMWxC3lOBt^jWu~TEHqsv02=cD|8D<>XP_iuz!pr7A$gg2gN{4jPqUp}RK%-w zR%uPebqj!A$M1qirjELAg&EE%T6q1cnXS&~K&X9C;Z0McT%F;^Ne0?=YY9vhU6eu% zzU$Y`pDw8#(&@i2l#x&KP=AqH0(btrw^e8uF z2Oo3kK(&xW9nj762c8t76PogS1O(VHWC$J}xyc)6IZ}IA>p>%K~zlQ;+{Efl2uUM(hAXgM11Vw=`*uW8J?ev`G*Z#lE{Xb0CzYM(uq^JyLMGZcb z`5DrCIkgyzAtvu8D$_)v?&BxBfLvwwb3Flwil;$n@n zBmxs#fU3@|G&uRj!3BYV#xY6Cp=7o5sq57F)J2N4oClq2OAJHt=YlNxzGW(@iDQXM zj1d-1LL{tQ;$U2%6o2h*MXe>{t1_r5v6uU$!4>s9nLF_#slGEwdE02hp?nH^<0&}3 zPR3@lWp8_}DbBA6U%fQvv#2mmcjTw^Ffg@lKb1&r!@7OTtd!QPM#+hJ#Xch4^DOh+ zSG$Juc;KT)Cl3<1xc8_pAcCx53F6HDNfIfXR$6Gt6>%5{`-G>D*+8zf>|+w4&Su~e)VD&xXlXcZAcI+v$+9?>Fdzcnbypc zwRykGbU^*maz*)GkMp#0(Dr`9Z4Cvcc8Q*@iWruEy?B}n`9*ZO+X#7RZL#wv2?Ktd zp~2G9-crQ`v2dIX0_Ip(9Q1&obuP6@8i&Nj$L7!Wsi%0=X{-QDTY-f9QqnL^?lwa%|( z?)a*4eQ9APk-oOAyjYPn6qK}y*qq&(MaSYN$KXAtscrIaNKhhM_{rUqal1PupVGGc zOA7ts#Tb-O-|uYgXsd#DL26(|o}=ct0VJ%?R1!Nvs9M-eIr1r7X~+|z*rlOR*NvvN z2Nr@}=+Rg%mX@^f&G4Q6uLgB>=%fTAG^8nT_|#)By2vg}zqX1xCD9iGjo|$p1>)>Z z&V^(6|xqEa$5|e4w(o-zf9j|46)pi1T*lSgISL%Qh(ZX z#f$lIsSD9xC{!{23{X`<8CQP;W3vqt;EV0qYIit?YfB)s>x*xdJrEtHn{I*%rAiEf01OyD?OC^M~AQB_|uH7g!_FE6_Ie3B?>v824-~2`t9f8x3x2?u3$MXgFiG^kyF~U)0OFIXXO~~EYI(ZMR-i+>8a+tjx_i=ni|t$vE{yfJBma* z(Vh}?>3_e#48OO)H%bF2n=nfU`nZC2pV<{)j$c}qnTZG5ebsbZ zy1@fM%ReoWQ*{5=RtTOPA%QpAA9!+d_OJ#z{jx-#Ud`Liv!Z&FRrq3@9rU}dGa*^y zDZYe(t1}YeFf0#erWJExqt12V-dmQQRa(miv+T=Dj8CqvN-B6x$@!C}RO%L^O5;&T zDI_+oS*EU*?^3=uG9`J1gQH!EO7^}>^ZJKdp$o#71Gh*%Sd9KR?wh*Roie@qA@ue7dpUaA_v8G|lHgpJeh$Y=aQxqyCI+6sG}5-XtK7)q+{N=;=zCbUye@2)t^D$bx3 z`o#5ZmzUKRMR@!e+&J9eL=-p9;93Kz^viT)4FPh?E7wise4?vG^UX}4hqgoGR5)^m z9J(VW`9~0v!{O$+&Mj(FY24g^SYs0dnDx39QTiz+K__Xsrr;fZX%Fd_8{=DHCd*x{ zJ0e@EInFhE<|_@^X>|4U4!Sc2PxczE>N+CM3>h8tS-`z&TzcYG0@KMv;_Ap9AIkd) zf7ZFZ27BGxJV8IL+RcEF*5zU0p)#C&I+e@tDd*=4H)m{Mvumv4EBy{E(&pA9>3#lC z)QoR2J}lXVF;E0PHLQd7*)6LCcMeTISq&C(9avG4s|_9dK!)i|nxgCtT4fqtf|20V zj2x2VV6?G)4t#&}stV|fPy!H1?{bC>0V%!E?SUjns#W&7>|drTHG7^!m5x%Y+{eQ5xdx=&Go2Xx zE6@wl&H>GkE6;vqnSTKNm*I3W2Lhd){`A7XPuTv)2M5J`LGED1kT?vu$GET^_r!?H zYY81K-a;tZu0yMn?Ufp9<5`guJ>uMZ^5aduCFv>k0rsqCepUh;qGPw)G#cS>KG+f^ z0ITr{?V$`f9{+=9LqRmBQNOg{Ts!4UVk`WY$O5@vBRQ$$$$EkLmk!+tJ-h{e-@{=F zT~w{rf0~Nic46D(Q`G4pt(~FrT?@}&ws6&4e&>#t7p7G27?29UUKKp)=>QJbGp;KCWHNCtE z^$PdLL|S@&_%=4?8HU76Vk*#Qr2u1lFK_;5CVQy4@@{V5nw3|%pXzc)T6mJ{nSjNN zXhU9wt;my(Px6V??28Fwxe9h(2JNbJ!U>u;;hb+Kxjj!BQ?ixl!qpkF&AfALS}=5c z{n9(HdaUu~$Z1Pv^+h>A-FodN%4ZaP`VJcq)x$a-8F4UiSl1{d++&%!H@?<1vo%!= zuYYQ=ys~ioZKR!1<@@q{J32gt#2Dl__>!5;Vp8na&%x;2M^SS5n_Q^P(2@z%Aw*l@ zWRu`2^O;!s9LQ z5`A>fhDlQ-RotBtSf30fY6DdY!M@k0lZoMob6;etlbZdImd!zLiC-x7>DDYNe<>~& zO&9iZJvrL$0~;ul3h{efIW5&Y0>A2s_9@hRU4u1!y)PS1H=i%Jz5VGjeQqH*bNt-r zX06Mvt*u`u@-t%Bsa<-r`_0C6y}{k_x=F;PQn(cVTN&fQk5*S-8~Tj#Gv5i5=Gwhs zi^O1KQh{w{F8e{r?l+SZI%ltE9em`9%wPheufM?cnN(t13`2fIGv_PdfJ3T@sTO{v zXiq-k$Ubgx?_)j+i5pfHmCiDSTqNuzo>Q!*u@iyR*G!MVY>7+)r3(8E4%g61ptBny z+ghV?H~qRbLz+P6Hl;CIUu%76Rpb0 zm0=2Sd^Oh#;*s0Eqynf^*dwKBvY6Xnm9Hog=p1cXjRFrjPSN5nV-XUp6)$Z9qMS3B zVe3V7UF0PFdubykK?4KtQx4ZbN>4QyVCL*(JQYA3=eG$}Kkg%|=DBW&ITsJE^d_6* zF?R)Az9j_Pfo~BKn?i8=&`_Tbd>=d){Va@F|Dz4>z~=DedcY-Xlzgd6Td#Csy%gn= zKI=?`iGFF_a(~!J{{w2Kstv>BuwzSHs+$-Zhe35K@XGL>Z&kxt6=u za@1<-KaH)k6BFAVq*kquH7_BgKq;rpl5%BX(#FMA-1bQxy{Pio5xb38kMi+NSXRX2(sXR1H?S>TU z8urLh*v+Xzva~d_G95`dG*5Qy;`*%2G4s|KUHPYcxoDhYi2*dj4)bU`FVdo-#+vt) zjF!=pkBKcq2H>7&bMOihMR0npE=y^Br^gfO%wbxQb7toX4QnqHmtAoEb~H03f|W}V zu2Tyia8&+m23qU7J}&T6s%-ZJJt>ONzIH^UvnvX6hQ+({LUv!TiW*ZCnM+YlK36H{ zxFrb{W1k!b7+wnEXx}0&PFmPuju|nH$>qW@`eeOZ5!ES`>F6y%vFpg}2oHEo%Er8$ zgK4(WgA+D!5gu{g4}C}%AI%o4^DPPZ6geaKCRA|vtmeATZWT`mi;Xe4UIWKV>(H7< z8>i)VGgV5fI|-@}tP~IlYqL25KbSVtgVm}}HR9*^SP*MuV0G5&(y2<~+~-rs_LMYB z)5tY;sfoK$=G7M52e6F8D8vjF$M%6s5+1**QexkNnk(YSyxTd zIb7qEfKF;f-&;B&*$`mJfuTr3Z_ir;xo_(cB%M%iS$Q?smD%YHC%?Y0EZ!I)Q|2-F zg!rjj3XNCnnKH(P$?dVYheR8S5CDlLG7BUW$2iWpMOuQ4#E*adNzx9-VXKLg7riJY zd@q)zYR}0sk>HyG-A^z4uXR#jKh^6;-9vMww&5Mp``?< zA887(HSl(ACCckyc76Kn5?@s3uOQn+UEo*b203gmY}I}bIimoo>2U|+NX)+rsRIo( zzh*JjPL3CDLnQ{2S&3RU`9pDvf=xh_b8V6fU7cKZz)H@RBJU4-z8Bg48v6)0(LLpc zjp_J~sSA~v1;yq{jEEo5fGQ{32jksxaYgbm(qF)d`?G^3( z=q~zs2)0V6Jq0Ls$HL9;1#9iI-uSAT9Xs_Xxg0584@Zikjw)3h z1~Ox22}uhrs~RF%2VpZ)euUxXb>tE8qD^yL;rX6e4lhh_7Ce;7V)Tr}^Q1o(Tj;Qj zk_?s6EKkD6!t_k1WQ@SjXyVGf8%B^(S=4-?$7o}38K`;HarEi7u`l1KEw{QddGqx> zB(V8sVxw?$bWi|+$uQ)D{Rdo}%#9p@rZ1fxEo{wx=Qk43qQ3$>$;$vY#Oj$EJ()Oa zJ#|6m@Dd9^?;h_VUFVI^p(&>v3l!)22=<4C;{AhzwyLaMTIA1_yGiv5bi8%;4p85!|X8EPm&uWw_UV?z=$rZzI!DBa`KY5J44^ zwmpRukxcDOR2=Q>ot~T6IRgLel>E1L1bO8NAOm?YD|Xj1T$^~GS8AU4$EnQl`aAl7MUYiY76Ct(oq&zdA5tU2;tXE{La`s-SoJ| z+jU>r@4D*zA_UvIkfE`dSQnsP3Sf>srN0WTSuWd#3NSGj@qa=ZAGsoI2gv`~h0wz8 zz(DYX`@lI;0EqG;G{@`ko4NHox0RW98#YqtELu71eIzEpf6OVY;rod?YP?Wp2Jw6s&H} zU?mSuw>=q~YU}(C5Y#ukmuW<3s|y?CJG1#4_trCYfmt~mBVpD*pJtINBRfUa$6?}# zOREMZL~uQKW|=t5>=*NPVHSpr?4L$tNxGFt1Tt5!kj+ZJ@VtrYJk@?Qh29bOSKT7NXgz9) z?ayaliZqBuTf&Cr$`dL^)N`36&Zox3J5G zmh|sZL>IDSOY@wS_ZpmEL=O9OW!hYk6&etxS;sDgi+FCD->9y0$lO56P5+#6(D3EL zCS=ABATy5n2ii>RY;A!i&K7pIzm2xF+L-MeE4Ft=jqm3Z@X`m7`mE9~>Lu;iVXoG) z5i}GOSOx)-#%6Ue1Fb$hVH(;N$`|LgBMqCxF?PPmaA~9bKu$BTVLQO(&8KlbvzYC* z9W0p5cS+f-gnU+`mGgb`JnMtzQ7QRmPY}fghMz2H@)&JZ!6bFS<%ubNY^lsWQlAra zPL!lrgc?hhbQi*p>`l`$9dWSD)Jcm#<1zG1MP z2dDN0d&TE^8LEMNUzRBRJqg#r(n1&EVk}zgYn{=)*9RK7EO+*GRo_2RYp1_8@X;B6 z{e?BWwr2r31`~78viL1H-V{f=IH~)3^JVDy($VC!j9r3ewx2*YgTk;^)ELS)!O&ht z@(xs>&C_1*q6$phYt|3bOCpxRd5%9cCaI`FdX@pCCds!tHq3?_4J@hC0*hY==K2sS69+4pTZER&yk zGY>D_7DdH)^4-4J!fW6t-;qzdJ?4;_>pD+=7w_O+dG~6S#uxJP{|O5tStN{K>jR!b zfLQX{b>!ftfLbzDpU_+#e3f*QU(IK1U2x@7v67e;5x!k# zXi$}t!;+X*Z)p`3!iBKg?GF2F8GeU!u%af)fuMHBVk$8AJ&LohyNzb{l??`bn%o(U z09I%v{QP8Ot+I@kaM#aBW! z&H=vTUe;qJ-g$pJey3p`{S@UJS}&3&(~h`zFVDg6%-+g0xmzLX&v?90pPx1@xQ0{O zeStP9WXJx4!gTRNMaa$6X&bBH-TE2${KQk$hviz&jN{qzUDX}RzrieK`K@N3B)IlsX_5j^~){L+bzrmN6<3R1alXxIg`|1^q72 zYJ6$9j^pUk6=ofS)Jy5GVKYsD@GyaT*|203e|%m)1G8P%$zM`td1B*xI_56EBMq0B z%%$NrZ(}Q=_mtrU{k#mhKD3RD{AN)Id4&g3_-uL^K&X03ifuC^578Rv=b!`4`z%nj zO&Ap$O9qMkTdfzhUiq8jgp#UR=?^Um-$xQQ2%kD`?hiHa$RHJeRMH;9#rOPFv~gFf zLBxvCk$TLh?Oue|2B0NeoL%PmPK+-Ird7i(Sn250w%2^&PuE|jJxOndqytk&jVLg4 z)smyf{gk}noWROi@iRv>qAc>sWk008($PWD**aa8@FAQh7I{hEjWCB}ZN z>~Wa-pe&jAugV^R*2fwihg}aEkRbL7@;Hxzu*dR$_s Date: Wed, 11 Mar 2020 19:26:12 -0700 Subject: [PATCH 331/388] REG: dt64 shift with integer fill_value (#32591) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/arrays/datetimelike.py | 51 +++++++++++++++++++++++ pandas/core/internals/blocks.py | 13 +++--- pandas/tests/arrays/test_datetimelike.py | 17 ++++++++ pandas/tests/frame/methods/test_shift.py | 23 ++++++++++ pandas/tests/series/methods/test_shift.py | 10 +++++ 6 files changed, 110 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index ee4b265ce3ed9..e6d65b1f828cb 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -29,6 +29,7 @@ Fixed regressions - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) - Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`) +- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8c870c6255200..105d9581b1a25 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -745,6 +745,57 @@ def _from_factorized(cls, values, original): def _values_for_argsort(self): return self._data + @Appender(ExtensionArray.shift.__doc__) + def shift(self, periods=1, fill_value=None, axis=0): + if not self.size or periods == 0: + return self.copy() + + if is_valid_nat_for_dtype(fill_value, self.dtype): + fill_value = NaT + elif not isinstance(fill_value, self._recognized_scalars): + # only warn if we're not going to raise + if self._scalar_type is Period and lib.is_integer(fill_value): + # kludge for #31971 since Period(integer) tries to cast to str + new_fill = Period._from_ordinal(fill_value, freq=self.freq) + else: + new_fill = self._scalar_type(fill_value) + + # stacklevel here is chosen to be correct when called from + # DataFrame.shift or Series.shift + warnings.warn( + f"Passing {type(fill_value)} to shift is deprecated and " + "will raise in a future version, pass " + f"{self._scalar_type.__name__} instead.", + FutureWarning, + stacklevel=7, + ) + fill_value = new_fill + + fill_value = self._unbox_scalar(fill_value) + + new_values = self._data + + # make sure array sent to np.roll is c_contiguous + f_ordered = new_values.flags.f_contiguous + if f_ordered: + new_values = new_values.T + axis = new_values.ndim - axis - 1 + + new_values = np.roll(new_values, periods, axis=axis) + + axis_indexer = [slice(None)] * self.ndim + if periods > 0: + axis_indexer[axis] = slice(None, periods) + else: + axis_indexer[axis] = slice(periods, None) + new_values[tuple(axis_indexer)] = fill_value + + # restore original order + if f_ordered: + new_values = new_values.T + + return type(self)._simple_new(new_values, dtype=self.dtype) + # ------------------------------------------------------------------ # Additional array methods # These are not part of the EA API, but we implement them because diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 024d3b205df77..3f5c27bf5269c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1917,10 +1917,7 @@ def diff(self, n: int, axis: int = 1) -> List["Block"]: return super().diff(n, axis) def shift( - self, - periods: int, - axis: libinternals.BlockPlacement = 0, - fill_value: Any = None, + self, periods: int, axis: int = 0, fill_value: Any = None, ) -> List["ExtensionBlock"]: """ Shift the block by `periods`. @@ -2173,7 +2170,7 @@ def internal_values(self): def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta - # TODO: this can be removed if we ever have 2D EA + # TODO(EA2D): this can be removed if we ever have 2D EA result = super().iget(key) if isinstance(result, np.datetime64): result = Timestamp(result) @@ -2181,6 +2178,12 @@ def iget(self, key): result = Timedelta(result) return result + def shift(self, periods, axis=0, fill_value=None): + # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs + values = self.array_values() + new_values = values.shift(periods, fill_value=fill_value, axis=axis) + return self.make_block_same_class(new_values) + class DatetimeBlock(DatetimeLikeBlockMixin, Block): __slots__ = () diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index f99ee542d543c..b8a70752330c5 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -240,6 +240,23 @@ def test_inplace_arithmetic(self): arr -= pd.Timedelta(days=1) tm.assert_equal(arr, expected) + def test_shift_fill_int_deprecated(self): + # GH#31971 + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = arr.shift(1, fill_value=1) + + expected = arr.copy() + if self.array_cls is PeriodArray: + fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq) + else: + fill_val = arr._scalar_type(1) + expected[0] = fill_val + expected[1:] = arr[:-1] + tm.assert_equal(result, expected) + class TestDatetimeArray(SharedTests): index_cls = pd.DatetimeIndex diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index cfb17de892b1c..f6c89172bbf86 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -185,3 +185,26 @@ def test_tshift(self, datetime_frame): msg = "Freq was not given and was not set in the index" with pytest.raises(ValueError, match=msg): no_freq.tshift() + + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + df = ser.to_frame() + + with tm.assert_produces_warning(FutureWarning): + result = df.shift(1, fill_value=0) + + expected = pd.Series([pd.Timestamp(0), ser[0]]).to_frame() + tm.assert_frame_equal(result, expected) + + # axis = 1 + df2 = pd.DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + with tm.assert_produces_warning(FutureWarning): + result = df2.shift(1, axis=1, fill_value=0) + + expected = pd.DataFrame( + {"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]} + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py index 8256e2f33b936..e8d7f5958d0a1 100644 --- a/pandas/tests/series/methods/test_shift.py +++ b/pandas/tests/series/methods/test_shift.py @@ -263,3 +263,13 @@ def test_shift_categorical(self): tm.assert_index_equal(s.values.categories, sp1.values.categories) tm.assert_index_equal(s.values.categories, sn2.values.categories) + + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + + with tm.assert_produces_warning(FutureWarning): + result = ser.shift(1, fill_value=0) + + expected = pd.Series([pd.Timestamp(0), ser[0]]) + tm.assert_series_equal(result, expected) From 9e7cb7c102655d0ba92d2561c178da9254d5cef5 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Wed, 11 Mar 2020 21:28:59 -0500 Subject: [PATCH 332/388] BUG: Fix DateFrameGroupBy.mean error for Int64 dtype (#32223) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/groupby/generic.py | 2 +- pandas/tests/groupby/test_function.py | 31 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index e6d65b1f828cb..00b17e23677c1 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -92,6 +92,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.convert_dtypes` where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) +- Fixed bug where :meth:`DataFrameGroupBy.mean`, :meth:`DataFrameGroupBy.median`, :meth:`DataFrameGroupBy.var`, and :meth:`DataFrameGroupBy.std` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) **Strings** diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b7ac3048631c5..a9a608e6f76ba 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1083,7 +1083,7 @@ def _cython_agg_blocks( result = type(block.values)._from_sequence( result.ravel(), dtype=block.values.dtype ) - except ValueError: + except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 03278e69fe94a..9c33843cdcecc 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1605,3 +1605,34 @@ def test_groupby_mean_no_overflow(): } ) assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840 + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): + # https://github.com/pandas-dev/pandas/issues/32219 + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) + idx = pd.Index([1, 2, 3], dtype=object, name="a") + expected = pd.DataFrame({"b": arr}, index=idx) + + groups = pd.DataFrame(values, dtype="Int64").groupby("a") + + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) + + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected) From ef7e7208d3e396ac574b8b027ca42f89ac99e449 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 12 Mar 2020 02:32:55 +0000 Subject: [PATCH 333/388] BUG: GroupBy aggregation of DataFrame with MultiIndex columns breaks with custom function (#32040) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/groupby/generic.py | 8 +++++--- pandas/tests/groupby/aggregate/test_aggregate.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 00b17e23677c1..80cbf841e194e 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`) - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) +- Fixed regression in :meth:`groupby(..).agg() ` which was failing on frames with MultiIndex columns and a custom function (:issue:`31777`) - Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a9a608e6f76ba..4102b8527b6aa 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -955,9 +955,11 @@ def aggregate(self, func=None, *args, **kwargs): raise result = self._aggregate_frame(func) else: - result.columns = Index( - result.columns.levels[0], name=self._selected_obj.columns.name - ) + # select everything except for the last level, which is the one + # containing the name of the function(s), see GH 32040 + result.columns = result.columns.rename( + [self._selected_obj.columns.name] * result.columns.nlevels + ).droplevel(-1) if not self.as_index: self._insert_inaxis_grouper_inplace(result) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 48f8de7e51ae4..1265547653d7b 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -691,6 +691,19 @@ def test_agg_relabel_multiindex_duplicates(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)] +) +def test_multiindex_custom_func(func): + # GH 31777 + data = [[1, 4, 2], [5, 7, 1]] + df = pd.DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]])) + result = df.groupby(np.array([0, 1])).agg(func) + expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}} + expected = pd.DataFrame(expected_dict) + tm.assert_frame_equal(result, expected) + + def myfunc(s): return np.percentile(s, q=0.90) From c85f5f2d09a1ef28c09ae03a6de4b7cf6f21072d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 19:38:18 -0700 Subject: [PATCH 334/388] CLN: trim unnecessary checks (#32643) --- pandas/core/indexes/datetimelike.py | 3 +-- pandas/core/internals/blocks.py | 10 ++-------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 894e1d95a17bc..7889829516f7f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -22,7 +22,6 @@ is_list_like, is_period_dtype, is_scalar, - needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries @@ -484,7 +483,7 @@ def where(self, cond, other=None): if is_categorical_dtype(other): # e.g. we have a Categorical holding self.dtype - if needs_i8_conversion(other.categories): + if is_dtype_equal(other.categories.dtype, self.dtype): other = other._internal_get_values() if not is_dtype_equal(self.dtype, other.dtype): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3f5c27bf5269c..6d4ee6222933c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2287,11 +2287,7 @@ def to_native_types( return np.atleast_2d(result) def should_store(self, value) -> bool: - return ( - issubclass(value.dtype.type, np.datetime64) - and not is_datetime64tz_dtype(value) - and not is_extension_array_dtype(value) - ) + return is_datetime64_dtype(value.dtype) def set(self, locs, values): """ @@ -2539,9 +2535,7 @@ def fillna(self, value, **kwargs): return super().fillna(value, **kwargs) def should_store(self, value) -> bool: - return issubclass( - value.dtype.type, np.timedelta64 - ) and not is_extension_array_dtype(value) + return is_timedelta64_dtype(value.dtype) def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ From 27ad779713f1d54b83547db5465bec4245d279cd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 19:42:46 -0700 Subject: [PATCH 335/388] TST: tighten check_categorical=False tests (#32636) --- pandas/_testing.py | 10 ++-- .../tests/arrays/categorical/test_replace.py | 53 ++++++++++--------- pandas/tests/generic/test_frame.py | 6 +-- pandas/tests/io/pytables/test_store.py | 15 +----- pandas/tests/io/test_stata.py | 48 +++++++++++------ pandas/tests/reshape/merge/test_merge.py | 6 +-- pandas/tests/series/test_dtypes.py | 24 ++++----- 7 files changed, 84 insertions(+), 78 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 136dfbd40276d..dff15c66750ac 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -824,10 +824,14 @@ def assert_categorical_equal( left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes", ) else: + try: + lc = left.categories.sort_values() + rc = right.categories.sort_values() + except TypeError: + # e.g. '<' not supported between instances of 'int' and 'str' + lc, rc = left.categories, right.categories assert_index_equal( - left.categories.sort_values(), - right.categories.sort_values(), - obj=f"{obj}.categories", + lc, rc, obj=f"{obj}.categories", ) assert_index_equal( left.categories.take(left.codes), diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py index 52530123bd52f..b9ac3ce9a37ae 100644 --- a/pandas/tests/arrays/categorical/test_replace.py +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import pandas as pd @@ -5,44 +6,46 @@ @pytest.mark.parametrize( - "to_replace,value,expected,check_types,check_categorical", + "to_replace,value,expected,flip_categories", [ # one-to-one - (1, 2, [2, 2, 3], True, True), - (1, 4, [4, 2, 3], True, True), - (4, 1, [1, 2, 3], True, True), - (5, 6, [1, 2, 3], True, True), + (1, 2, [2, 2, 3], False), + (1, 4, [4, 2, 3], False), + (4, 1, [1, 2, 3], False), + (5, 6, [1, 2, 3], False), # many-to-one - ([1], 2, [2, 2, 3], True, True), - ([1, 2], 3, [3, 3, 3], True, True), - ([1, 2], 4, [4, 4, 3], True, True), - ((1, 2, 4), 5, [5, 5, 3], True, True), - ((5, 6), 2, [1, 2, 3], True, True), + ([1], 2, [2, 2, 3], False), + ([1, 2], 3, [3, 3, 3], False), + ([1, 2], 4, [4, 4, 3], False), + ((1, 2, 4), 5, [5, 5, 3], False), + ((5, 6), 2, [1, 2, 3], False), # many-to-many, handled outside of Categorical and results in separate dtype - ([1], [2], [2, 2, 3], False, False), - ([1, 4], [5, 2], [5, 2, 3], False, False), + ([1], [2], [2, 2, 3], True), + ([1, 4], [5, 2], [5, 2, 3], True), # check_categorical sorts categories, which crashes on mixed dtypes - (3, "4", [1, 2, "4"], True, False), - ([1, 2, "3"], "5", ["5", "5", 3], True, False), + (3, "4", [1, 2, "4"], False), + ([1, 2, "3"], "5", ["5", "5", 3], True), ], ) -def test_replace(to_replace, value, expected, check_types, check_categorical): +def test_replace(to_replace, value, expected, flip_categories): # GH 31720 + stays_categorical = not isinstance(value, list) + s = pd.Series([1, 2, 3], dtype="category") result = s.replace(to_replace, value) expected = pd.Series(expected, dtype="category") s.replace(to_replace, value, inplace=True) + + if flip_categories: + expected = expected.cat.set_categories(expected.cat.categories[::-1]) + + if not stays_categorical: + # the replace call loses categorical dtype + expected = pd.Series(np.asarray(expected)) + tm.assert_series_equal( - expected, - result, - check_dtype=check_types, - check_categorical=check_categorical, - check_category_order=False, + expected, result, check_category_order=False, ) tm.assert_series_equal( - expected, - s, - check_dtype=check_types, - check_categorical=check_categorical, - check_category_order=False, + expected, s, check_category_order=False, ) diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 8fe49b2ec2299..631f484cfc22a 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -273,17 +273,13 @@ def test_to_xarray_index_types(self, index): assert isinstance(result, Dataset) # idempotency - # categoricals are not preserved # datetimes w/tz are preserved # column names are lost expected = df.copy() expected["f"] = expected["f"].astype(object) expected.columns.name = None tm.assert_frame_equal( - result.to_dataframe(), - expected, - check_index_type=False, - check_categorical=False, + result.to_dataframe(), expected, ) @td.skip_if_no("xarray", min_version="0.7.0") diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 34f6a73812c97..2702d378fd153 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -13,8 +13,6 @@ from pandas.compat import is_platform_little_endian, is_platform_windows import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_categorical_dtype - import pandas as pd from pandas import ( Categorical, @@ -1057,18 +1055,7 @@ def test_latin_encoding(self, setup_path, dtype, val): s_nan = ser.replace(nan_rep, np.nan) - if is_categorical_dtype(s_nan): - assert is_categorical_dtype(retr) - tm.assert_series_equal( - s_nan, retr, check_dtype=False, check_categorical=False - ) - else: - tm.assert_series_equal(s_nan, retr) - - # FIXME: don't leave commented-out - # fails: - # for x in examples: - # roundtrip(s, nan_rep=b'\xf8\xfc') + tm.assert_series_equal(s_nan, retr) def test_append_some_nans(self, setup_path): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 3efac9cd605a8..eaa92fa53d799 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1026,7 +1026,14 @@ def test_categorical_with_stata_missing_values(self, version): original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index("index") - tm.assert_frame_equal(res, original, check_categorical=False) + + expected = original.copy() + for col in expected: + cat = expected[col]._values + new_cats = cat.remove_unused_categories().categories + cat = cat.set_categories(new_cats, ordered=True) + expected[col] = cat + tm.assert_frame_equal(res, expected) @pytest.mark.parametrize("file", ["dta19_115", "dta19_117"]) def test_categorical_order(self, file): @@ -1044,7 +1051,9 @@ def test_categorical_order(self, file): cols = [] for is_cat, col, labels, codes in expected: if is_cat: - cols.append((col, pd.Categorical.from_codes(codes, labels))) + cols.append( + (col, pd.Categorical.from_codes(codes, labels, ordered=True)) + ) else: cols.append((col, pd.Series(labels, dtype=np.float32))) expected = DataFrame.from_dict(dict(cols)) @@ -1052,7 +1061,7 @@ def test_categorical_order(self, file): # Read with and with out categoricals, ensure order is identical file = getattr(self, file) parsed = read_stata(file) - tm.assert_frame_equal(expected, parsed, check_categorical=False) + tm.assert_frame_equal(expected, parsed) # Check identity of codes for col in expected: @@ -1137,18 +1146,30 @@ def test_read_chunks_117( chunk = itr.read(chunksize) except StopIteration: break - from_frame = parsed.iloc[pos : pos + chunksize, :] + from_frame = parsed.iloc[pos : pos + chunksize, :].copy() + from_frame = self._convert_categorical(from_frame) tm.assert_frame_equal( - from_frame, - chunk, - check_dtype=False, - check_datetimelike_compat=True, - check_categorical=False, + from_frame, chunk, check_dtype=False, check_datetimelike_compat=True, ) pos += chunksize itr.close() + @staticmethod + def _convert_categorical(from_frame: DataFrame) -> DataFrame: + """ + Emulate the categorical casting behavior we expect from roundtripping. + """ + for col in from_frame: + ser = from_frame[col] + if is_categorical_dtype(ser.dtype): + cat = ser._values.remove_unused_categories() + if cat.categories.dtype == object: + categories = pd.Index(cat.categories._values) + cat = cat.set_categories(categories) + from_frame[col] = cat + return from_frame + def test_iterator(self): fname = self.dta3_117 @@ -1223,13 +1244,10 @@ def test_read_chunks_115( chunk = itr.read(chunksize) except StopIteration: break - from_frame = parsed.iloc[pos : pos + chunksize, :] + from_frame = parsed.iloc[pos : pos + chunksize, :].copy() + from_frame = self._convert_categorical(from_frame) tm.assert_frame_equal( - from_frame, - chunk, - check_dtype=False, - check_datetimelike_compat=True, - check_categorical=False, + from_frame, chunk, check_dtype=False, check_datetimelike_compat=True, ) pos += chunksize diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index d80e2e7afceef..51e6f80df657d 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2077,8 +2077,7 @@ def test_merge_equal_cat_dtypes(cat_dtype, reverse): } ).set_index("foo") - # Categorical is unordered, so don't check ordering. - tm.assert_frame_equal(result, expected, check_categorical=False) + tm.assert_frame_equal(result, expected) def test_merge_equal_cat_dtypes2(): @@ -2100,8 +2099,7 @@ def test_merge_equal_cat_dtypes2(): {"left": [1, 2], "right": [3, 2], "foo": Series(["a", "b"]).astype(cat_dtype)} ).set_index("foo") - # Categorical is unordered, so don't check ordering. - tm.assert_frame_equal(result, expected, check_categorical=False) + tm.assert_frame_equal(result, expected) def test_merge_on_cat_and_ext_array(): diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 80a024eda7848..31f17be2fac7b 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -296,18 +296,18 @@ def cmp(a, b): # array conversion tm.assert_almost_equal(np.array(s), np.array(s.values)) - # valid conversion - for valid in [ - lambda x: x.astype("category"), - lambda x: x.astype(CategoricalDtype()), - lambda x: x.astype("object").astype("category"), - lambda x: x.astype("object").astype(CategoricalDtype()), - ]: - - result = valid(s) - # compare series values - # internal .categories can't be compared because it is sorted - tm.assert_series_equal(result, s, check_categorical=False) + tm.assert_series_equal(s.astype("category"), s) + tm.assert_series_equal(s.astype(CategoricalDtype()), s) + + roundtrip_expected = s.cat.set_categories( + s.cat.categories.sort_values() + ).cat.remove_unused_categories() + tm.assert_series_equal( + s.astype("object").astype("category"), roundtrip_expected + ) + tm.assert_series_equal( + s.astype("object").astype(CategoricalDtype()), roundtrip_expected + ) # invalid conversion (these are NOT a dtype) msg = ( From fa48f5f098b5ed49a08b9d19f072559f74e1ccc2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 21:30:02 -0700 Subject: [PATCH 336/388] REF: implement _get_engine_target (#32611) --- pandas/core/indexes/base.py | 25 ++++++++++++++++--------- pandas/core/indexes/extension.py | 3 +++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 98e3b3ad258ea..cef8a39d75a4c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -571,10 +571,10 @@ def _cleanup(self): def _engine(self): # property, for now, slow to look up - # to avoid a reference cycle, bind `_ndarray_values` to a local variable, so + # to avoid a reference cycle, bind `target_values` to a local variable, so # `self` is not passed into the lambda. - _ndarray_values = self._ndarray_values - return self._engine_type(lambda: _ndarray_values, len(self)) + target_values = self._get_engine_target() + return self._engine_type(lambda: target_values, len(self)) # -------------------------------------------------------------------- # Array-Like Methods @@ -2976,7 +2976,7 @@ def get_indexer( "backfill or nearest reindexing" ) - indexer = self._engine.get_indexer(target._ndarray_values) + indexer = self._engine.get_indexer(target._get_engine_target()) return ensure_platform_int(indexer) @@ -2990,19 +2990,20 @@ def _convert_tolerance(self, tolerance, target): def _get_fill_indexer( self, target: "Index", method: str_t, limit=None, tolerance=None ) -> np.ndarray: + + target_values = target._get_engine_target() + if self.is_monotonic_increasing and target.is_monotonic_increasing: engine_method = ( self._engine.get_pad_indexer if method == "pad" else self._engine.get_backfill_indexer ) - indexer = engine_method(target._ndarray_values, limit) + indexer = engine_method(target_values, limit) else: indexer = self._get_fill_indexer_searchsorted(target, method, limit) if tolerance is not None: - indexer = self._filter_indexer_tolerance( - target._ndarray_values, indexer, tolerance - ) + indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance) return indexer def _get_fill_indexer_searchsorted( @@ -3915,6 +3916,12 @@ def _internal_get_values(self) -> np.ndarray: """ return self.values + def _get_engine_target(self) -> np.ndarray: + """ + Get the ndarray that we can pass to the IndexEngine constructor. + """ + return self._values + @Appender(IndexOpsMixin.memory_usage.__doc__) def memory_usage(self, deep: bool = False) -> int: result = super().memory_usage(deep=deep) @@ -4657,7 +4664,7 @@ def get_indexer_non_unique(self, target): elif self.is_all_dates and target.is_all_dates: # GH 30399 tgt_values = target.asi8 else: - tgt_values = target._ndarray_values + tgt_values = target._get_engine_target() indexer, missing = self._engine.get_indexer_non_unique(tgt_values) return ensure_platform_int(indexer), missing diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 7b11df15f69fb..4984fc27516ff 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -231,6 +231,9 @@ def __array__(self, dtype=None) -> np.ndarray: def _ndarray_values(self) -> np.ndarray: return self._data._ndarray_values + def _get_engine_target(self) -> np.ndarray: + return self._data._values_for_argsort() + @Appender(Index.dropna.__doc__) def dropna(self, how="any"): if how not in ("any", "all"): From da67e1ed07382ecd0d462ecde6d1b642c7ba01b4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 21:44:24 -0700 Subject: [PATCH 337/388] CLN: avoid values_from_object in reshape.merge (#32537) --- pandas/core/reshape/merge.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e75dced21f488..aeec2a43f39bf 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -45,6 +45,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays.categorical import _recode_for_categories import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.frame import _merge_doc from pandas.core.internals import concatenate_block_managers from pandas.core.sorting import is_int64_overflow_possible @@ -1820,9 +1821,14 @@ def _right_outer_join(x, y, max_groups): def _factorize_keys(lk, rk, sort=True): # Some pre-processing for non-ndarray lk / rk - if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): - lk = getattr(lk, "_values", lk)._data - rk = getattr(rk, "_values", rk)._data + lk = extract_array(lk, extract_numpy=True) + rk = extract_array(rk, extract_numpy=True) + + if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype): + # Extract the ndarray (UTC-localized) values + # Note: we dont need the dtypes to match, as these can still be compared + lk, _ = lk._values_for_factorize() + rk, _ = rk._values_for_factorize() elif ( is_categorical_dtype(lk) and is_categorical_dtype(rk) and lk.is_dtype_equal(rk) @@ -1837,11 +1843,7 @@ def _factorize_keys(lk, rk, sort=True): lk = ensure_int64(lk.codes) rk = ensure_int64(rk) - elif ( - is_extension_array_dtype(lk.dtype) - and is_extension_array_dtype(rk.dtype) - and lk.dtype == rk.dtype - ): + elif is_extension_array_dtype(lk.dtype) and is_dtype_equal(lk.dtype, rk.dtype): lk, _ = lk._values_for_factorize() rk, _ = rk._values_for_factorize() @@ -1849,15 +1851,15 @@ def _factorize_keys(lk, rk, sort=True): # GH#23917 TODO: needs tests for case where lk is integer-dtype # and rk is datetime-dtype klass = libhashtable.Int64Factorizer - lk = ensure_int64(com.values_from_object(lk)) - rk = ensure_int64(com.values_from_object(rk)) - elif issubclass(lk.dtype.type, (np.timedelta64, np.datetime64)) and issubclass( - rk.dtype.type, (np.timedelta64, np.datetime64) - ): + lk = ensure_int64(np.asarray(lk)) + rk = ensure_int64(np.asarray(rk)) + + elif needs_i8_conversion(lk.dtype) and is_dtype_equal(lk.dtype, rk.dtype): # GH#23917 TODO: Needs tests for non-matching dtypes klass = libhashtable.Int64Factorizer - lk = ensure_int64(com.values_from_object(lk)) - rk = ensure_int64(com.values_from_object(rk)) + lk = ensure_int64(np.asarray(lk, dtype=np.int64)) + rk = ensure_int64(np.asarray(rk, dtype=np.int64)) + else: klass = libhashtable.Factorizer lk = ensure_object(lk) From 7f6a22c4e38703be7f22a20de532cab7c21919de Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Thu, 12 Mar 2020 06:46:17 +0200 Subject: [PATCH 338/388] CLN: Suppres compile warnings of pandas/io/sas/sas.pyx (#32517) --- pandas/io/sas/sas.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 40fea0aaf0d07..11171af1e0c82 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -120,7 +120,7 @@ cdef const uint8_t[:] rdc_decompress(int result_length, const uint8_t[:] inbuff) cdef: uint8_t cmd - uint16_t ctrl_bits, ctrl_mask = 0, ofs, cnt + uint16_t ctrl_bits = 0, ctrl_mask = 0, ofs, cnt int rpos = 0, k uint8_t[:] outbuff = np.zeros(result_length, dtype=np.uint8) Py_ssize_t ipos = 0, length = len(inbuff) From bb4395ee88b48bf854563053bf00abed2f8d920b Mon Sep 17 00:00:00 2001 From: Jiaxiang Date: Thu, 12 Mar 2020 12:47:12 +0800 Subject: [PATCH 339/388] ENH: Move corrwith from transformation to reduction kernels in groupby.base (#32294) --- pandas/core/groupby/base.py | 2 +- pandas/tests/groupby/test_categorical.py | 3 +++ pandas/tests/groupby/test_transform.py | 8 +++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 700d8d503d086..363286704ba95 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -98,6 +98,7 @@ def _gotitem(self, key, ndim, subset=None): [ "all", "any", + "corrwith", "count", "first", "idxmax", @@ -132,7 +133,6 @@ def _gotitem(self, key, ndim, subset=None): [ "backfill", "bfill", - "corrwith", "cumcount", "cummax", "cummin", diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 9b07269811d8e..9ea5252b91e13 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1262,6 +1262,9 @@ def test_series_groupby_on_2_categoricals_unobserved( if reduction_func == "ngroup": pytest.skip("ngroup is not truly a reduction") + if reduction_func == "corrwith": # GH 32293 + pytest.xfail("TODO: implemented SeriesGroupBy.corrwith") + df = pd.DataFrame( { "cat_1": pd.Categorical(list("AABB"), categories=list("ABCD")), diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 740103eec185a..2295eb2297fa6 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -327,9 +327,9 @@ def test_transform_transformation_func(transformation_func): } ) - if transformation_func in ["pad", "backfill", "tshift", "corrwith", "cumcount"]: + if transformation_func in ["pad", "backfill", "tshift", "cumcount"]: # These transformation functions are not yet covered in this test - pytest.xfail("See GH 31269 and GH 31270") + pytest.xfail("See GH 31269") elif _is_numpy_dev and transformation_func in ["cummin"]: pytest.xfail("https://github.com/pandas-dev/pandas/issues/31992") elif transformation_func == "fillna": @@ -1093,8 +1093,10 @@ def test_transform_agg_by_name(reduction_func, obj): pytest.xfail("TODO: g.transform('ngroup') doesn't work") if func == "size": # GH#27469 pytest.xfail("TODO: g.transform('size') doesn't work") + if func == "corrwith" and isinstance(obj, Series): # GH#32293 + pytest.xfail("TODO: implement SeriesGroupBy.corrwith") - args = {"nth": [0], "quantile": [0.5]}.get(func, []) + args = {"nth": [0], "quantile": [0.5], "corrwith": [obj]}.get(func, []) result = g.transform(func, *args) From f3859cbadc2b705317dabd99debc94b474ed56c6 Mon Sep 17 00:00:00 2001 From: Joy Bhalla Date: Thu, 12 Mar 2020 10:19:23 +0530 Subject: [PATCH 340/388] DOC: Fix capitalization of the word pandas in the docs (#32439) --- doc/source/development/code_style.rst | 8 +-- doc/source/development/contributing.rst | 68 +++++++++---------- .../development/contributing_docstring.rst | 4 +- doc/source/development/extending.rst | 10 +-- doc/source/development/internals.rst | 2 +- doc/source/development/maintaining.rst | 4 +- doc/source/development/policies.rst | 10 +-- doc/source/development/roadmap.rst | 10 +-- 8 files changed, 58 insertions(+), 58 deletions(-) diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst index 1b223cf5f026b..fa7532a68a06d 100644 --- a/doc/source/development/code_style.rst +++ b/doc/source/development/code_style.rst @@ -21,9 +21,9 @@ Patterns foo.__class__ ------------- -*pandas* uses 'type(foo)' instead 'foo.__class__' as it makes the code more -readable. +pandas uses 'type(foo)' instead 'foo.__class__' as it is making the code more +readable. For example: **Good:** @@ -50,7 +50,7 @@ Concatenated strings f-strings ~~~~~~~~~ -*pandas* uses f-strings formatting instead of '%' and '.format()' string formatters. +pandas uses f-strings formatting instead of '%' and '.format()' string formatters. The convention of using f-strings on a string that is concatenated over several lines, is to prefix only the lines containing values which need to be interpreted. @@ -114,7 +114,7 @@ For example: Representation function (aka 'repr()') -------------------------------------- -*pandas* uses 'repr()' instead of '%r' and '!r'. +pandas uses 'repr()' instead of '%r' and '!r'. The use of 'repr()' will only happen when the value is not an obvious string. diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index db9e23035b977..88782701b096c 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -53,7 +53,7 @@ Feel free to ask questions on the `mailing list Bug reports and enhancement requests ==================================== -Bug reports are an important part of making *pandas* more stable. Having a complete bug report +Bug reports are an important part of making pandas more stable. Having a complete bug report will allow others to reproduce the bug and provide insight into fixing. See `this stackoverflow article `_ and `this blogpost `_ @@ -75,14 +75,14 @@ Bug reports must: ... ``` -#. Include the full version string of *pandas* and its dependencies. You can use the built-in function:: +#. Include the full version string of pandas and its dependencies. You can use the built-in function:: >>> import pandas as pd >>> pd.show_versions() #. Explain why the current behavior is wrong/not desired and what you expect instead. -The issue will then show up to the *pandas* community and be open to comments/ideas from others. +The issue will then show up to the pandas community and be open to comments/ideas from others. .. _contributing.github: @@ -90,14 +90,14 @@ Working with the code ===================== Now that you have an issue you want to fix, enhancement to add, or documentation to improve, -you need to learn how to work with GitHub and the *pandas* code base. +you need to learn how to work with GitHub and the pandas code base. .. _contributing.version_control: Version control, Git, and GitHub -------------------------------- -To the new user, working with Git is one of the more daunting aspects of contributing to *pandas*. +To the new user, working with Git is one of the more daunting aspects of contributing to pandas. It can very quickly become overwhelming, but sticking to the guidelines below will help keep the process straightforward and mostly trouble free. As always, if you are having difficulties please feel free to ask for help. @@ -221,7 +221,7 @@ environment: `_ * Make sure your conda is up to date (``conda update conda``) * Make sure that you have :ref:`cloned the repository ` -* ``cd`` to the *pandas* source directory +* ``cd`` to the pandas source directory We'll now kick off a three-step process: @@ -330,7 +330,7 @@ The above can be simplified to:: This changes your working directory to the shiny-new-feature branch. Keep any changes in this branch specific to one bug or feature so it is clear -what the branch brings to *pandas*. You can have many shiny-new-features +what the branch brings to pandas. You can have many shiny-new-features and switch in between them using the git checkout command. When creating this branch, make sure your master branch is up to date with @@ -349,9 +349,9 @@ you created the branch, check the section on Contributing to the documentation ================================= -Contributing to the documentation benefits everyone who uses *pandas*. +Contributing to the documentation benefits everyone who uses pandas. We encourage you to help us improve the documentation, and -you don't have to be an expert on *pandas* to do so! In fact, +you don't have to be an expert on pandas to do so! In fact, there are sections of the docs that are worse off after being written by experts. If something in the docs doesn't make sense to you, updating the relevant section after you figure it out is a great way to ensure it will help @@ -361,7 +361,7 @@ the next person. :local: -About the *pandas* documentation +About the pandas documentation -------------------------------- The documentation is written in **reStructuredText**, which is almost like writing @@ -372,7 +372,7 @@ complex changes to the documentation as well. Some other important things to know about the docs: -* The *pandas* documentation consists of two parts: the docstrings in the code +* The pandas documentation consists of two parts: the docstrings in the code itself and the docs in this folder ``doc/``. The docstrings provide a clear explanation of the usage of the individual @@ -452,7 +452,7 @@ This will identify methods documented in ``doc/source/reference`` that are not a class methods, and existing methods that are not documented in ``doc/source/reference``. -Updating a *pandas* docstring +Updating a pandas docstring ----------------------------- When improving a single function or method's docstring, it is not necessarily @@ -477,7 +477,7 @@ When doing a PR with a docstring update, it is good to post the output of the validation script in a comment on github. -How to build the *pandas* documentation +How to build the pandas documentation --------------------------------------- Requirements @@ -543,7 +543,7 @@ And you'll have the satisfaction of seeing your new and improved documentation! Building master branch documentation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When pull requests are merged into the *pandas* ``master`` branch, the main parts of +When pull requests are merged into the pandas ``master`` branch, the main parts of the documentation are also built by Travis-CI. These docs are then hosted `here `__, see also the :ref:`Continuous Integration ` section. @@ -563,7 +563,7 @@ Writing good code is not just about what you write. It is also about *how* you write it. During :ref:`Continuous Integration ` testing, several tools will be run to check your code for stylistic errors. Generating any warnings will cause the test to fail. -Thus, good style is a requirement for submitting code to *pandas*. +Thus, good style is a requirement for submitting code to pandas. There is a tool in pandas to help contributors verify their changes before contributing them to the project:: @@ -601,7 +601,7 @@ set in the ``pandas.compat._optional.VERSIONS`` dict. C (cpplint) ~~~~~~~~~~~ -*pandas* uses the `Google `_ +pandas uses the `Google `_ standard. Google provides an open source style checker called ``cpplint``, but we use a fork of it that can be found `here `__. Here are *some* of the more common ``cpplint`` issues: @@ -652,7 +652,7 @@ fixes manually. Python (PEP8 / black) ~~~~~~~~~~~~~~~~~~~~~ -*pandas* follows the `PEP8 `_ standard +pandas follows the `PEP8 `_ standard and uses `Black `_ and `Flake8 `_ to ensure a consistent code format throughout the project. @@ -703,7 +703,7 @@ Note that these commands can be run analogously with ``black``. Import formatting ~~~~~~~~~~~~~~~~~ -*pandas* uses `isort `__ to standardise import +pandas uses `isort `__ to standardise import formatting across the codebase. A guide to import layout as per pep8 can be found `here `__. @@ -778,7 +778,7 @@ Note that if needed, you can skip these checks with ``git commit --no-verify``. Backwards compatibility ~~~~~~~~~~~~~~~~~~~~~~~ -Please try to maintain backward compatibility. *pandas* has lots of users with lots of +Please try to maintain backward compatibility. pandas has lots of users with lots of existing code, so don't break it if at all possible. If you think breakage is required, clearly state why as part of the pull request. Also, be careful when changing method signatures and add deprecation warnings where needed. Also, add the deprecated sphinx @@ -825,7 +825,7 @@ See :ref:`contributing.warnings` for more. Type hints ---------- -*pandas* strongly encourages the use of :pep:`484` style type hints. New development should contain type hints and pull requests to annotate existing code are accepted as well! +pandas strongly encourages the use of :pep:`484` style type hints. New development should contain type hints and pull requests to annotate existing code are accepted as well! Style guidelines ~~~~~~~~~~~~~~~~ @@ -906,9 +906,9 @@ With custom types and inference this is not always possible so exceptions are ma pandas-specific types ~~~~~~~~~~~~~~~~~~~~~ -Commonly used types specific to *pandas* will appear in `pandas._typing `_ and you should use these where applicable. This module is private for now but ultimately this should be exposed to third party libraries who want to implement type checking against pandas. +Commonly used types specific to pandas will appear in `pandas._typing `_ and you should use these where applicable. This module is private for now but ultimately this should be exposed to third party libraries who want to implement type checking against pandas. -For example, quite a few functions in *pandas* accept a ``dtype`` argument. This can be expressed as a string like ``"object"``, a ``numpy.dtype`` like ``np.int64`` or even a pandas ``ExtensionDtype`` like ``pd.CategoricalDtype``. Rather than burden the user with having to constantly annotate all of those options, this can simply be imported and reused from the pandas._typing module +For example, quite a few functions in pandas accept a ``dtype`` argument. This can be expressed as a string like ``"object"``, a ``numpy.dtype`` like ``np.int64`` or even a pandas ``ExtensionDtype`` like ``pd.CategoricalDtype``. Rather than burden the user with having to constantly annotate all of those options, this can simply be imported and reused from the pandas._typing module .. code-block:: python @@ -922,7 +922,7 @@ This module will ultimately house types for repeatedly used concepts like "path- Validating type hints ~~~~~~~~~~~~~~~~~~~~~ -*pandas* uses `mypy `_ to statically analyze the code base and type hints. After making any change you can ensure your type hints are correct by running +pandas uses `mypy `_ to statically analyze the code base and type hints. After making any change you can ensure your type hints are correct by running .. code-block:: shell @@ -933,7 +933,7 @@ Validating type hints Testing with continuous integration ----------------------------------- -The *pandas* test suite will run automatically on `Travis-CI `__ and +The pandas test suite will run automatically on `Travis-CI `__ and `Azure Pipelines `__ continuous integration services, once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, @@ -959,7 +959,7 @@ This is an example of a green build. Test-driven development/code writing ------------------------------------ -*pandas* is serious about testing and strongly encourages contributors to embrace +pandas is serious about testing and strongly encourages contributors to embrace `test-driven development (TDD) `_. This development process "relies on the repetition of a very short development cycle: first the developer writes an (initially failing) automated test case that defines a desired @@ -968,10 +968,10 @@ So, before actually writing any code, you should write your tests. Often the te taken from the original GitHub issue. However, it is always worth considering additional use cases and writing corresponding tests. -Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, +Adding tests is one of the most common requests after code is pushed to pandas. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. -Like many packages, *pandas* uses `pytest +Like many packages, pandas uses `pytest `_ and the convenient extensions in `numpy.testing `_. @@ -1018,7 +1018,7 @@ E.g. "# brief comment, see GH#28907" Transitioning to ``pytest`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*pandas* existing test structure is *mostly* class-based, meaning that you will typically find tests wrapped in a class. +pandas existing test structure is *mostly* class-based, meaning that you will typically find tests wrapped in a class. .. code-block:: python @@ -1220,7 +1220,7 @@ Running the test suite ---------------------- The tests can then be run directly inside your Git clone (without having to -install *pandas*) by typing:: +install pandas) by typing:: pytest pandas @@ -1272,9 +1272,9 @@ Running the performance test suite ---------------------------------- Performance matters and it is worth considering whether your code has introduced -performance regressions. *pandas* is in the process of migrating to +performance regressions. pandas is in the process of migrating to `asv benchmarks `__ -to enable easy monitoring of the performance of critical *pandas* operations. +to enable easy monitoring of the performance of critical pandas operations. These benchmarks are all found in the ``pandas/asv_bench`` directory. asv supports both python2 and python3. @@ -1361,7 +1361,7 @@ directive. This should also be put in the docstring when adding a new function or method (`example `__) or a new keyword argument (`example `__). -Contributing your changes to *pandas* +Contributing your changes to pandas ===================================== .. _contributing.commit-code: @@ -1386,7 +1386,7 @@ Doing 'git status' again should give something like:: # modified: /relative/path/to/file-you-added.py # -Finally, commit your changes to your local repository with an explanatory message. *Pandas* +Finally, commit your changes to your local repository with an explanatory message. pandas uses a convention for commit message prefixes and layout. Here are some common prefixes along with general guidelines for when to use them: @@ -1434,7 +1434,7 @@ like:: upstream git://github.com/pandas-dev/pandas.git (fetch) upstream git://github.com/pandas-dev/pandas.git (push) -Now your code is on GitHub, but it is not yet a part of the *pandas* project. For that to +Now your code is on GitHub, but it is not yet a part of the pandas project. For that to happen, a pull request needs to be submitted on GitHub. Review your code diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 1c99b341f6c5a..4cc21bbac1b10 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -80,7 +80,7 @@ about reStructuredText can be found in: * `Quick reStructuredText reference `_ * `Full reStructuredText specification `_ -Pandas has some helpers for sharing docstrings between related classes, see +pandas has some helpers for sharing docstrings between related classes, see :ref:`docstring.sharing`. The rest of this document will summarize all the above guides, and will @@ -932,7 +932,7 @@ plot will be generated automatically when building the documentation. Sharing docstrings ------------------ -Pandas has a system for sharing docstrings, with slight variations, between +pandas has a system for sharing docstrings, with slight variations, between classes. This helps us keep docstrings consistent, while keeping things clear for the user reading. It comes at the cost of some complexity when writing. diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 270f20e8118bc..98e3ffcf74ad1 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -7,7 +7,7 @@ Extending pandas **************** While pandas provides a rich set of methods, containers, and data types, your -needs may not be fully satisfied. Pandas offers a few options for extending +needs may not be fully satisfied. pandas offers a few options for extending pandas. .. _extending.register-accessors: @@ -80,8 +80,8 @@ Extension types The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs are new and experimental. They may change between versions without warning. -Pandas defines an interface for implementing data types and arrays that *extend* -NumPy's type system. Pandas itself uses the extension system for some types +pandas defines an interface for implementing data types and arrays that *extend* +NumPy's type system. pandas itself uses the extension system for some types that aren't built into NumPy (categorical, period, interval, datetime with timezone). @@ -122,7 +122,7 @@ This class provides all the array-like functionality. ExtensionArrays are limited to 1 dimension. An ExtensionArray is linked to an ExtensionDtype via the ``dtype`` attribute. -Pandas makes no restrictions on how an extension array is created via its +pandas makes no restrictions on how an extension array is created via its ``__new__`` or ``__init__``, and puts no restrictions on how you store your data. We do require that your array be convertible to a NumPy array, even if this is relatively expensive (as it is for ``Categorical``). @@ -224,7 +224,7 @@ for an example. As part of your implementation, we require that you defer to pandas when a pandas container (:class:`Series`, :class:`DataFrame`, :class:`Index`) is detected in ``inputs``. -If any of those is present, you should return ``NotImplemented``. Pandas will take care of +If any of those is present, you should return ``NotImplemented``. pandas will take care of unboxing the array from the container and re-calling the ufunc with the unwrapped input. .. _extending.extension.testing: diff --git a/doc/source/development/internals.rst b/doc/source/development/internals.rst index 748caae295460..4ad045a91b5fe 100644 --- a/doc/source/development/internals.rst +++ b/doc/source/development/internals.rst @@ -85,7 +85,7 @@ if you compute the levels and codes yourself, please be careful. Values ~~~~~~ -Pandas extends NumPy's type system with custom types, like ``Categorical`` or +pandas extends NumPy's type system with custom types, like ``Categorical`` or datetimes with a timezone, so we have multiple notions of "values". For 1-D containers (``Index`` classes and ``Series``) we have the following convention: diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst index e65b66fc243c5..9ae9d47b89341 100644 --- a/doc/source/development/maintaining.rst +++ b/doc/source/development/maintaining.rst @@ -13,7 +13,7 @@ The main contributing guide is available at :ref:`contributing`. Roles ----- -Pandas uses two levels of permissions: **triage** and **core** team members. +pandas uses two levels of permissions: **triage** and **core** team members. Triage members can label and close issues and pull requests. @@ -25,7 +25,7 @@ GitHub publishes the full `list of permissions`_. Tasks ----- -Pandas is largely a volunteer project, so these tasks shouldn't be read as +pandas is largely a volunteer project, so these tasks shouldn't be read as "expectations" of triage and maintainers. Rather, they're general descriptions of what it means to be a maintainer. diff --git a/doc/source/development/policies.rst b/doc/source/development/policies.rst index 224948738341e..b7cc3db3ad260 100644 --- a/doc/source/development/policies.rst +++ b/doc/source/development/policies.rst @@ -11,7 +11,7 @@ Version Policy .. versionchanged:: 1.0.0 -Pandas uses a loose variant of semantic versioning (`SemVer`_) to govern +pandas uses a loose variant of semantic versioning (`SemVer`_) to govern deprecations, API compatibility, and version numbering. A pandas release number is made up of ``MAJOR.MINOR.PATCH``. @@ -23,7 +23,7 @@ and how to migrate existing code to the new behavior. Whenever possible, a deprecation path will be provided rather than an outright breaking change. -Pandas will introduce deprecations in **minor** releases. These deprecations +pandas will introduce deprecations in **minor** releases. These deprecations will preserve the existing behavior while emitting a warning that provide guidance on: @@ -39,19 +39,19 @@ deprecation removed in the next next major release (2.0.0). .. note:: - Pandas will sometimes make *behavior changing* bug fixes, as part of + pandas will sometimes make *behavior changing* bug fixes, as part of minor or patch releases. Whether or not a change is a bug fix or an API-breaking change is a judgement call. We'll do our best, and we invite you to participate in development discussion on the issue tracker or mailing list. These policies do not apply to features marked as **experimental** in the documentation. -Pandas may change the behavior of experimental features at any time. +pandas may change the behavior of experimental features at any time. Python Support ~~~~~~~~~~~~~~ -Pandas will only drop support for specific Python versions (e.g. 3.6.x, 3.7.x) in +pandas will only drop support for specific Python versions (e.g. 3.6.x, 3.7.x) in pandas **major** releases. .. _SemVer: https://semver.org diff --git a/doc/source/development/roadmap.rst b/doc/source/development/roadmap.rst index fafe63d80249c..e57ff82add278 100644 --- a/doc/source/development/roadmap.rst +++ b/doc/source/development/roadmap.rst @@ -22,8 +22,8 @@ See :ref:`roadmap.evolution` for proposing changes to this document. Extensibility ------------- -Pandas :ref:`extending.extension-types` allow for extending NumPy types with custom -data types and array storage. Pandas uses extension types internally, and provides +pandas :ref:`extending.extension-types` allow for extending NumPy types with custom +data types and array storage. pandas uses extension types internally, and provides an interface for 3rd-party libraries to define their own custom data types. Many parts of pandas still unintentionally convert data to a NumPy array. @@ -71,7 +71,7 @@ Block manager rewrite We'd like to replace pandas current internal data structures (a collection of 1 or 2-D arrays) with a simpler collection of 1-D arrays. -Pandas internal data model is quite complex. A DataFrame is made up of +pandas internal data model is quite complex. A DataFrame is made up of one or more 2-dimensional "blocks", with one or more blocks per dtype. This collection of 2-D arrays is managed by the BlockManager. @@ -132,7 +132,7 @@ Some specific goals include Performance monitoring ---------------------- -Pandas uses `airspeed velocity `__ to +pandas uses `airspeed velocity `__ to monitor for performance regressions. ASV itself is a fabulous tool, but requires some additional work to be integrated into an open source project's workflow. @@ -155,7 +155,7 @@ We'd like to fund improvements and maintenance of these tools to Roadmap Evolution ----------------- -Pandas continues to evolve. The direction is primarily determined by community +pandas continues to evolve. The direction is primarily determined by community interest. Everyone is welcome to review existing items on the roadmap and to propose a new item. From 940bbb599bcaebb191e8ed726aae88f1cb22757e Mon Sep 17 00:00:00 2001 From: gabrielvf1 Date: Thu, 12 Mar 2020 01:52:29 -0300 Subject: [PATCH 341/388] TST/VIZ: add test for legend colors for DataFrame with duplicate column labels #11136 (#32451) --- pandas/tests/plotting/test_frame.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index ffbd135466709..32673b9a0a5cf 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -3316,6 +3316,16 @@ def test_missing_markers_legend_using_style(self): self._check_legend_labels(ax, labels=["A", "B", "C"]) self._check_legend_marker(ax, expected_markers=[".", ".", "."]) + def test_colors_of_columns_with_same_name(self): + # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 + # Creating a DataFrame with duplicate column labels and testing colors of them. + df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = pd.DataFrame({"a": [2, 4, 6]}) + df_concat = pd.concat([df, df1], axis=1) + result = df_concat.plot() + for legend, line in zip(result.get_legend().legendHandles, result.lines): + assert legend.get_color() == line.get_color() + def _generate_4_axes_via_gridspec(): import matplotlib.pyplot as plt From ec773416951820302039fd256d39ac4630c6c7d1 Mon Sep 17 00:00:00 2001 From: Arkadeep Adhikari Date: Thu, 12 Mar 2020 00:15:29 -0700 Subject: [PATCH 342/388] DOC: Remove absolute urls from the docs (#32539) --- doc/source/getting_started/comparison/comparison_with_sql.rst | 2 +- doc/source/user_guide/cookbook.rst | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index 6a03c06de3699..e3c8f8f5ccbcd 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -75,7 +75,7 @@ Filtering in SQL is done via a WHERE clause. LIMIT 5; DataFrames can be filtered in multiple ways; the most intuitive of which is using -`boolean indexing `_. +:ref:`boolean indexing ` .. ipython:: python diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 4afdb14e5c39e..e51b5c9097951 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -794,8 +794,7 @@ The :ref:`Resample ` docs. `Time grouping with some missing values `__ -`Valid frequency arguments to Grouper -`__ +Valid frequency arguments to Grouper :ref:`Timeseries ` `Grouping using a MultiIndex `__ From 9060d888fb679840ba7b9d55236d664d93c50ef0 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 12 Mar 2020 10:29:48 +0000 Subject: [PATCH 343/388] PERF: copy cached attributes on extension index shallow_copy (#32640) --- doc/source/whatsnew/v1.1.0.rst | 10 +++++----- pandas/core/indexes/category.py | 12 ++++-------- pandas/core/indexes/datetimelike.py | 5 ++++- pandas/core/indexes/datetimes.py | 1 + pandas/core/indexes/interval.py | 12 ++++++++---- pandas/core/indexes/period.py | 1 + pandas/core/indexes/timedeltas.py | 1 + 7 files changed, 24 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 035d8da74d1c1..5b6f70be478c2 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -89,9 +89,9 @@ Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`. Previously a ``AttributeError`` was raised (:issue:`31126`) -- :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std`` and :meth:`~DataFrameGroupby.var``) +- :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std` and :meth:`~DataFrameGroupby.var`) now raise a ``TypeError`` if a not-accepted keyword argument is passed into it. - Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`) + Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`) - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`) - Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`) - @@ -188,9 +188,9 @@ Performance improvements - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) -- The internal :meth:`Index._shallow_copy` now copies cached attributes over to the new index, - avoiding creating these again on the new index. This can speed up many operations - that depend on creating copies of existing indexes (:issue:`28584`) +- The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index, + avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of + existing indexes (:issue:`28584`, :issue:`32640`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 5997843f7ac6d..6388f2007cb12 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -233,6 +233,7 @@ def _simple_new(cls, values: Categorical, name: Label = None): result._data = values result.name = name + result._cache = {} result._reset_identity() result._no_setting_name = False @@ -242,14 +243,9 @@ def _simple_new(cls, values: Categorical, name: Label = None): @Appender(Index._shallow_copy.__doc__) def _shallow_copy(self, values=None, name: Label = no_default): - name = self.name if name is no_default else name - - if values is None: - values = self.values - - cat = Categorical(values, dtype=self.dtype) - - return type(self)._simple_new(cat, name=name) + if values is not None: + values = Categorical(values, dtype=self.dtype) + return super()._shallow_copy(values=values, name=name) def _is_dtype_compat(self, other) -> bool: """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 7889829516f7f..bf35c85ac8ed5 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -617,6 +617,7 @@ def _set_freq(self, freq): def _shallow_copy(self, values=None, name: Label = lib.no_default): name = self.name if name is lib.no_default else name + cache = self._cache.copy() if values is None else {} if values is None: values = self._data @@ -635,7 +636,9 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): del attributes["freq"] attributes["name"] = name - return type(self)._simple_new(values, **attributes) + result = self._simple_new(values, **attributes) + result._cache = cache + return result # -------------------------------------------------------------------- # Set Operation Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 185ad8e4c365a..c8035a9de432b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -268,6 +268,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): result = object.__new__(cls) result._data = dtarr result.name = name + result._cache = {} result._no_setting_name = False # For groupby perf. See note in indexes/base about _index_data result._index_data = dtarr._data diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index efdaf5a331f5f..80d58de6f1437 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -243,6 +243,7 @@ def _simple_new(cls, array: IntervalArray, name: Label = None): result = IntervalMixin.__new__(cls) result._data = array result.name = name + result._cache = {} result._no_setting_name = False result._reset_identity() return result @@ -332,12 +333,15 @@ def from_tuples( # -------------------------------------------------------------------- @Appender(Index._shallow_copy.__doc__) - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values=None, name: Label = lib.no_default): + name = self.name if name is lib.no_default else name + cache = self._cache.copy() if values is None else {} if values is None: values = self._data - attributes = self._get_attributes_dict() - attributes.update(kwargs) - return self._simple_new(values, **attributes) + + result = self._simple_new(values, name=name) + result._cache = cache + return result @cache_readonly def _isnan(self): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9ab80c0b6145c..8ae792d3f63b5 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -233,6 +233,7 @@ def _simple_new(cls, values: PeriodArray, name: Label = None): # For groupby perf. See note in indexes/base about _index_data result._index_data = values._data result.name = name + result._cache = {} result._reset_identity() return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5e4a8e83bd95b..a6d9d4dfc330b 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -180,6 +180,7 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): result = object.__new__(cls) result._data = values result._name = name + result._cache = {} # For groupby perf. See note in indexes/base about _index_data result._index_data = values._data From 9c85af8621742d4eeb87bacf423ea45488889150 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Thu, 12 Mar 2020 13:18:20 +0100 Subject: [PATCH 344/388] BUG: pd.ExcelFile closes stream on destruction (#32544) * FIX: pandas.ExcelFile should not close stream passed as parameter on destruction Regression test added --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/io/excel/_base.py | 12 ++++-------- pandas/io/excel/_openpyxl.py | 5 +++++ pandas/tests/io/excel/test_readers.py | 19 +++++++++++++++---- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 80cbf841e194e..0bfcb95b25ffd 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -29,6 +29,7 @@ Fixed regressions - Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) +- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) - Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`) - Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f98d9501f1f73..d1139f640cef4 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -366,6 +366,9 @@ def _workbook_class(self): def load_workbook(self, filepath_or_buffer): pass + def close(self): + pass + @property @abc.abstractmethod def sheet_names(self): @@ -895,14 +898,7 @@ def sheet_names(self): def close(self): """close io if necessary""" - if self.engine == "openpyxl": - # https://stackoverflow.com/questions/31416842/ - # openpyxl-does-not-close-excel-workbook-in-read-only-mode - wb = self.book - wb._archive.close() - - if hasattr(self.io, "close"): - self.io.close() + self._reader.close() def __enter__(self): return self diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index a96c0f814e2d8..0696d82e51f34 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -492,6 +492,11 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): filepath_or_buffer, read_only=True, data_only=True, keep_links=False ) + def close(self): + # https://stackoverflow.com/questions/31416842/ + # openpyxl-does-not-close-excel-workbook-in-read-only-mode + self.book.close() + @property def sheet_names(self) -> List[str]: return self.book.sheetnames diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index cbc043820e35e..8732d4063d74c 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -629,6 +629,17 @@ def test_read_from_py_localpath(self, read_ext): tm.assert_frame_equal(expected, actual) + @td.check_file_leaks + def test_close_from_py_localpath(self, read_ext): + + # GH31467 + str_path = os.path.join("test1" + read_ext) + with open(str_path, "rb") as f: + x = pd.read_excel(f, "Sheet1", index_col=0) + del x + # should not throw an exception because the passed file was closed + f.read() + def test_reader_seconds(self, read_ext): if pd.read_excel.keywords["engine"] == "pyxlsb": pytest.xfail("Sheets containing datetimes not supported by pyxlsb") @@ -1020,10 +1031,10 @@ def test_excel_read_buffer(self, engine, read_ext): tm.assert_frame_equal(expected, actual) def test_reader_closes_file(self, engine, read_ext): - f = open("test1" + read_ext, "rb") - with pd.ExcelFile(f) as xlsx: - # parses okay - pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) + with open("test1" + read_ext, "rb") as f: + with pd.ExcelFile(f) as xlsx: + # parses okay + pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) assert f.closed From 4aba9ed85750d3205e7bdebf0c55c707472ab629 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 12 Mar 2020 08:15:59 -0500 Subject: [PATCH 345/388] DOC: fix announce formtting (#32656) --- doc/sphinxext/announce.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/sphinxext/announce.py b/doc/sphinxext/announce.py index e4859157f73de..0084036f1e75c 100755 --- a/doc/sphinxext/announce.py +++ b/doc/sphinxext/announce.py @@ -122,14 +122,15 @@ def build_string(revision_range, heading="Contributors"): components["uline"] = "=" * len(components["heading"]) components["authors"] = "* " + "\n* ".join(components["authors"]) + # Don't change this to an fstring. It breaks the formatting. tpl = textwrap.dedent( - f"""\ - {components['heading']} - {components['uline']} + """\ + {heading} + {uline} - {components['author_message']} - {components['authors']}""" - ) + {author_message} + {authors}""" + ).format(**components) return tpl From 7147c3880f33d5ceeed45248c308853c32a1c15b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 12 Mar 2020 09:11:01 -0500 Subject: [PATCH 346/388] DOC: Organize regressions (#32658) * Update doc/source/whatsnew/v1.0.2.rst Co-Authored-By: Joris Van den Bossche * Update doc/source/whatsnew/v1.0.2.rst Co-Authored-By: Simon Hawkins Co-authored-by: Joris Van den Bossche Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.0.2.rst | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 0bfcb95b25ffd..97260ec5e9f0e 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -1,6 +1,6 @@ .. _whatsnew_102: -What's new in 1.0.2 (March 11, 2020) +What's new in 1.0.2 (March 12, 2020) ------------------------------------ These are the changes in pandas 1.0.2. See :ref:`release` for a full changelog @@ -15,23 +15,34 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`) -- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) +**Groupby** + - Fixed regression in :meth:`groupby(..).agg() ` which was failing on frames with MultiIndex columns and a custom function (:issue:`31777`) - Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) - Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`) -- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). -- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`groupby(..).agg() ` calling a user-provided function an extra time on an empty input (:issue:`31760`) -- Fixed regression in joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` to preserve ``freq`` in simple cases (:issue:`32166`) -- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) + +**I/O** + - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) -- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) +- Fixed regression in :meth:`DataFrame.to_excel` when the ``columns`` keyword argument is passed (:issue:`31677`) - Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) +- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). + +**Reindexing/alignment** + +- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) +- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) - Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`) +- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) + +**Other** + +- Fixed regression in joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` to preserve ``freq`` in simple cases (:issue:`32166`) - Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`) +- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) .. --------------------------------------------------------------------------- From 05925d262e9c8f9fc7c40856a3063afb69244285 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Thu, 12 Mar 2020 19:24:35 +0200 Subject: [PATCH 347/388] CLN: removed unused import (#32518) --- pandas/_libs/tslibs/np_datetime.pyx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index b59a1101e0bf7..9a8a8fdae6d2f 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -1,12 +1,15 @@ from cpython.object cimport Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE -from cpython.datetime cimport (datetime, date, - PyDateTime_IMPORT, - PyDateTime_GET_YEAR, PyDateTime_GET_MONTH, - PyDateTime_GET_DAY, PyDateTime_DATE_GET_HOUR, - PyDateTime_DATE_GET_MINUTE, - PyDateTime_DATE_GET_SECOND, - PyDateTime_DATE_GET_MICROSECOND) +from cpython.datetime cimport ( + PyDateTime_DATE_GET_HOUR, + PyDateTime_DATE_GET_MICROSECOND, + PyDateTime_DATE_GET_MINUTE, + PyDateTime_DATE_GET_SECOND, + PyDateTime_GET_DAY, + PyDateTime_GET_MONTH, + PyDateTime_GET_YEAR, + PyDateTime_IMPORT, +) PyDateTime_IMPORT from numpy cimport int64_t From 25e2f8687f0a7033ddbbd6aa1b833d0586f99bad Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Thu, 12 Mar 2020 16:19:22 -0700 Subject: [PATCH 348/388] fix infer_freq raises section (#32642) * fix infer_freq raises section * edits * another period --- pandas/tseries/frequencies.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 4af516af8a28e..2477ff29fbfd5 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -249,9 +249,14 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: Returns ------- str or None - None if no discernible frequency - TypeError if the index is not datetime-like - ValueError if there are less than three values. + None if no discernible frequency. + + Raises + ------ + TypeError + If the index is not datetime-like. + ValueError + If there are fewer than three values. """ import pandas as pd From 059f9bff62d3bfe53eeece409b48b879abd1a3b1 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 13 Mar 2020 14:14:13 +0200 Subject: [PATCH 349/388] CLN: Remove PY2 compat code (#32677) Co-authored-by: MomIsBestFriend <> --- pandas/core/reshape/reshape.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 359e5b956f8a5..145cf43112be3 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -985,12 +985,7 @@ def get_empty_frame(data) -> DataFrame: if prefix is None: dummy_cols = levels else: - - # PY2 embedded unicode, gh-22084 - def _make_col_name(prefix, prefix_sep, level) -> str: - return f"{prefix}{prefix_sep}{level}" - - dummy_cols = [_make_col_name(prefix, prefix_sep, level) for level in levels] + dummy_cols = [f"{prefix}{prefix_sep}{level}" for level in levels] index: Optional[Index] if isinstance(data, Series): From 2b37609477773ff02f6b4115ed809ed8c0e18698 Mon Sep 17 00:00:00 2001 From: epizzigoni <62080595+epizzigoni@users.noreply.github.com> Date: Fri, 13 Mar 2020 23:03:29 +0100 Subject: [PATCH 350/388] DOC: filter method example is more clear (#32690) --- pandas/core/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6f743d7388574..f6853a1bbb748 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4558,6 +4558,10 @@ def filter( >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), ... index=['mouse', 'rabbit'], ... columns=['one', 'two', 'three']) + >>> df + one two three + mouse 1 2 3 + rabbit 4 5 6 >>> # select columns by name >>> df.filter(items=['one', 'three']) From c5987d6cb0ab6c6cddd3e54bc0f64742f54a9f35 Mon Sep 17 00:00:00 2001 From: masterpiga Date: Fri, 13 Mar 2020 23:20:27 +0100 Subject: [PATCH 351/388] Fix wrong docstring in qcut (#32679) --- pandas/core/reshape/tile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 86417faf6cd11..b9eb89b4d14c6 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -286,7 +286,7 @@ def qcut( Parameters ---------- x : 1d ndarray or Series - q : int or list-like of int + q : int or list-like of float Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. labels : array or False, default None From f676a21e459853c474acdbf0feed34411c237277 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 19:19:50 -0700 Subject: [PATCH 352/388] REF: make DatetimeIndex._simple_new actually simple (#32282) --- pandas/_libs/tslibs/offsets.pyx | 13 +++++- pandas/core/indexes/base.py | 4 +- pandas/core/indexes/datetimelike.py | 22 ++++------ pandas/core/indexes/datetimes.py | 49 +++++++++------------- pandas/core/indexes/timedeltas.py | 9 ++-- pandas/tests/arrays/test_datetimelike.py | 4 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tseries/offsets.py | 15 ------- 8 files changed, 48 insertions(+), 70 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 48a3886c20a3a..da59c635b5a18 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -114,7 +114,18 @@ def apply_index_wraps(func): # Note: normally we would use `@functools.wraps(func)`, but this does # not play nicely with cython class methods def wrapper(self, other): - result = func(self, other) + + is_index = getattr(other, "_typ", "") == "datetimeindex" + + # operate on DatetimeArray + arr = other._data if is_index else other + + result = func(self, arr) + + if is_index: + # Wrap DatetimeArray result back to DatetimeIndex + result = type(other)._simple_new(result, name=other.name) + if self.normalize: result = result.to_period('D').to_timestamp() return result diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cef8a39d75a4c..23e68802eb126 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3281,13 +3281,11 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): target = _ensure_has_len(target) # target may be an iterator if not isinstance(target, Index) and len(target) == 0: - attrs = self._get_attributes_dict() - attrs.pop("freq", None) # don't preserve freq if isinstance(self, ABCRangeIndex): values = range(0) else: values = self._data[:0] # appropriately-dtyped empty array - target = self._simple_new(values, **attrs) + target = self._simple_new(values, name=self.name) else: target = ensure_index(target) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bf35c85ac8ed5..054a64bf3f990 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -622,21 +622,11 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): if values is None: values = self._data - if isinstance(values, type(self)): - values = values._data if isinstance(values, np.ndarray): # TODO: We would rather not get here values = type(self._data)(values, dtype=self.dtype) - attributes = self._get_attributes_dict() - - if self.freq is not None: - if isinstance(values, (DatetimeArray, TimedeltaArray)): - if values.freq is None: - del attributes["freq"] - - attributes["name"] = name - result = self._simple_new(values, **attributes) + result = type(self)._simple_new(values, name=name) result._cache = cache return result @@ -780,7 +770,10 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_start, side="left") right_chunk = right.values[:loc] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates) + result = self._shallow_copy(dates) + result._set_freq("infer") + # TODO: can we infer that it has self.freq? + return result else: left, right = other, self @@ -792,7 +785,10 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates) + result = self._shallow_copy(dates) + result._set_freq("infer") + # TODO: can we infer that it has self.freq? + return result else: return left diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c8035a9de432b..e791133220dbf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -7,17 +7,13 @@ from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts from pandas._libs.tslibs import fields, parsing, timezones +from pandas._typing import Label from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar -from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import is_valid_nat_for_dtype -from pandas.core.arrays.datetimes import ( - DatetimeArray, - tz_to_dtype, - validate_tz_from_dtype, -) +from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype import pandas.core.common as com from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin @@ -36,7 +32,20 @@ def _new_DatetimeIndex(cls, d): if "data" in d and not isinstance(d["data"], DatetimeIndex): # Avoid need to verify integrity by calling simple_new directly data = d.pop("data") - result = cls._simple_new(data, **d) + if not isinstance(data, DatetimeArray): + # For backward compat with older pickles, we may need to construct + # a DatetimeArray to adapt to the newer _simple_new signature + tz = d.pop("tz") + freq = d.pop("freq") + dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) + else: + dta = data + for key in ["tz", "freq"]: + # These are already stored in our DatetimeArray; if they are + # also in the pickle and don't match, we have a problem. + if key in d: + assert d.pop(key) == getattr(dta, key) + result = cls._simple_new(dta, **d) else: with warnings.catch_warnings(): # TODO: If we knew what was going in to **d, we might be able to @@ -244,34 +253,16 @@ def __new__( return subarr @classmethod - def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): - """ - We require the we have a dtype compat for the values - if we are passed a non-dtype compat, then coerce using the constructor - """ - if isinstance(values, DatetimeArray): - if tz: - tz = validate_tz_from_dtype(dtype, tz) - dtype = DatetimeTZDtype(tz=tz) - elif dtype is None: - dtype = _NS_DTYPE - - values = DatetimeArray(values, freq=freq, dtype=dtype) - tz = values.tz - freq = values.freq - values = values._data - - dtype = tz_to_dtype(tz) - dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) - assert isinstance(dtarr, DatetimeArray) + def _simple_new(cls, values: DatetimeArray, name: Label = None): + assert isinstance(values, DatetimeArray), type(values) result = object.__new__(cls) - result._data = dtarr + result._data = values result.name = name result._cache = {} result._no_setting_name = False # For groupby perf. See note in indexes/base about _index_data - result._index_data = dtarr._data + result._index_data = values._data result._reset_identity() return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index a6d9d4dfc330b..7a7670b0e7965 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,6 +1,7 @@ """ implement the TimedeltaIndex """ from pandas._libs import NaT, Timedelta, index as libindex +from pandas._typing import Label from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( @@ -154,7 +155,7 @@ def __new__( if isinstance(data, TimedeltaArray) and freq is None: if copy: data = data.copy() - return cls._simple_new(data, name=name, freq=freq) + return cls._simple_new(data, name=name) if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: @@ -170,12 +171,8 @@ def __new__( return cls._simple_new(tdarr, name=name) @classmethod - def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): - # `dtype` is passed by _shallow_copy in corner cases, should always - # be timedelta64[ns] if present - assert dtype == _TD_DTYPE, dtype + def _simple_new(cls, values: TimedeltaArray, name: Label = None): assert isinstance(values, TimedeltaArray) - assert freq is None or values.freq == freq result = object.__new__(cls) result._data = values diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index b8a70752330c5..e505917da1dc4 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -65,8 +65,8 @@ def test_compare_len1_raises(self): # to the case where one has length-1, which numpy would broadcast data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 - idx = self.array_cls._simple_new(data, freq="D") - arr = self.index_cls(idx) + arr = self.array_cls._simple_new(data, freq="D") + idx = self.index_cls(arr) with pytest.raises(ValueError, match="Lengths must match"): arr == arr[:1] diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 8ed98410ad9a4..a533d06a924e6 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -363,7 +363,7 @@ def test_equals(self): assert not idx.equals(pd.Series(idx2)) # same internal, different tz - idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific") + idx3 = pd.DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index b6bbe008812cb..bc20d784c8dee 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -337,9 +337,6 @@ def apply_index(self, i): # integer addition on PeriodIndex is deprecated, # so we directly use _time_shift instead asper = i.to_period("W") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._time_shift(weeks) i = shifted.to_timestamp() + i.to_perioddelta("W") @@ -629,9 +626,6 @@ def apply_index(self, i): # to_period rolls forward to next BDay; track and # reduce n where it does when rolling forward asper = i.to_period("B") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data if self.n > 0: shifted = (i.to_perioddelta("B") - time).asi8 != 0 @@ -1384,9 +1378,6 @@ def apply_index(self, i): # integer-array addition on PeriodIndex is deprecated, # so we use _addsub_int_array directly asper = i.to_period("M") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._addsub_int_array(roll // 2, operator.add) i = type(dti)(shifted.to_timestamp()) @@ -1582,9 +1573,6 @@ def apply_index(self, i): # integer addition on PeriodIndex is deprecated, # so we use _time_shift directly asper = i.to_period("W") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._time_shift(self.n) return shifted.to_timestamp() + i.to_perioddelta("W") @@ -1608,9 +1596,6 @@ def _end_apply_index(self, dtindex): base, mult = libfrequencies.get_freq_code(self.freqstr) base_period = dtindex.to_period(base) - if not isinstance(base_period._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - base_period = base_period._data if self.n > 0: # when adding, dates on end roll to next From 8d654cae0e3a1a1acf4a139dec5c49e43f3abd18 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 19:55:30 -0700 Subject: [PATCH 353/388] CLN: remove unused int_block (#32695) --- pandas/tests/internals/test_internals.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index a569726e9a22a..0bbea671bae19 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -207,7 +207,6 @@ def setup_method(self, method): self.cblock = create_block("complex", [7]) self.oblock = create_block("object", [1, 3]) self.bool_block = create_block("bool", [5]) - self.int_block = create_block("int", [6]) def test_constructor(self): int32block = create_block("i4", [0]) From 2bc24d5ad95844cec7858b74ad8cc30f2cacbdec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 19:57:59 -0700 Subject: [PATCH 354/388] CLN: avoid _ndarray_values in reshape.merge (#32693) --- pandas/core/reshape/merge.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index aeec2a43f39bf..85386923e9912 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1657,10 +1657,13 @@ def _get_merge_keys(self): def _get_join_indexers(self): """ return the join indexers """ - def flip(xs): + def flip(xs) -> np.ndarray: """ unlike np.transpose, this returns an array of tuples """ xs = [ - x if not is_extension_array_dtype(x) else x._ndarray_values for x in xs + x + if not is_extension_array_dtype(x) + else extract_array(x)._values_for_argsort() + for x in xs ] labels = list(string.ascii_lowercase[: len(xs)]) dtypes = [x.dtype for x in xs] From d8f60c3763563a6d920459abd6bd058b53368b8c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:03:16 -0700 Subject: [PATCH 355/388] REF: put all post-processing at end of DataFrame._reduce (#32671) --- pandas/core/frame.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e8db3acc7ff3..e153fdaac16e2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7808,6 +7808,8 @@ def _reduce( self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds ): + assert filter_type is None or filter_type == "bool", filter_type + dtype_is_dt = self.dtypes.apply( lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x) ) @@ -7835,7 +7837,7 @@ def f(x): return op(x, axis=axis, skipna=skipna, **kwds) def _get_data(axis_matters): - if filter_type is None or filter_type == "numeric": + if filter_type is None: data = self._get_numeric_data() elif filter_type == "bool": if axis_matters: @@ -7882,15 +7884,11 @@ def blk_func(values): return out if numeric_only is None: - values = self.values + data = self + values = data.values try: result = f(values) - if filter_type == "bool" and is_object_dtype(values) and axis is None: - # work around https://github.com/numpy/numpy/issues/10489 - # TODO: combine with hasattr(result, 'dtype') further down - # hard since we don't have `values` down there. - result = np.bool_(result) except TypeError: # e.g. in nanops trying to convert strs to float @@ -7916,30 +7914,36 @@ def blk_func(values): # TODO: why doesnt axis matter here? data = _get_data(axis_matters=False) - with np.errstate(all="ignore"): - result = f(data.values) labels = data._get_agg_axis(axis) + + values = data.values + with np.errstate(all="ignore"): + result = f(values) else: if numeric_only: data = _get_data(axis_matters=True) + labels = data._get_agg_axis(axis) values = data.values - labels = data._get_agg_axis(axis) else: - values = self.values + data = self + values = data.values result = f(values) - if hasattr(result, "dtype") and is_object_dtype(result.dtype): + if filter_type == "bool" and is_object_dtype(values) and axis is None: + # work around https://github.com/numpy/numpy/issues/10489 + # TODO: can we de-duplicate parts of this with the next blocK? + result = np.bool_(result) + elif hasattr(result, "dtype") and is_object_dtype(result.dtype): try: - if filter_type is None or filter_type == "numeric": + if filter_type is None: result = result.astype(np.float64) elif filter_type == "bool" and notna(result).all(): result = result.astype(np.bool_) except (ValueError, TypeError): - # try to coerce to the original dtypes item by item if we can if axis == 0: - result = coerce_to_dtypes(result, self.dtypes) + result = coerce_to_dtypes(result, data.dtypes) if constructor is not None: result = self._constructor_sliced(result, index=labels) From ea9942b5c198a9c4bb763447df4e34857214cf58 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:06:14 -0700 Subject: [PATCH 356/388] CLN: remove unused kwargs from BlockManager.downcast (#32691) --- pandas/core/internals/blocks.py | 18 ++++++++++-------- pandas/core/internals/managers.py | 26 ++++++++++++++++++++------ pandas/tests/generic/test_generic.py | 17 ++--------------- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6d4ee6222933c..ea1f394aacd5f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -403,7 +403,9 @@ def _split_op_result(self, result) -> List["Block"]: return [result] - def fillna(self, value, limit=None, inplace: bool = False, downcast=None): + def fillna( + self, value, limit=None, inplace: bool = False, downcast=None + ) -> List["Block"]: """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again @@ -417,9 +419,9 @@ def fillna(self, value, limit=None, inplace: bool = False, downcast=None): if not self._can_hold_na: if inplace: - return self + return [self] else: - return self.copy() + return [self.copy()] if self._can_hold_element(value): # equivalent: _try_coerce_args(value) would not raise @@ -428,7 +430,7 @@ def fillna(self, value, limit=None, inplace: bool = False, downcast=None): # we can't process the value, but nothing to do if not mask.any(): - return self if inplace else self.copy() + return [self] if inplace else [self.copy()] # operate column-by-column def f(mask, val, idx): @@ -442,7 +444,7 @@ def f(mask, val, idx): return self.split_and_operate(None, f, inplace) - def split_and_operate(self, mask, f, inplace: bool): + def split_and_operate(self, mask, f, inplace: bool) -> List["Block"]: """ split the block per-column, and apply the callable f per-column, return a new block for each. Handle @@ -1191,7 +1193,7 @@ def _interpolate_with_fill( fill_value=None, coerce=False, downcast=None, - ): + ) -> List["Block"]: """ fillna but using the interpolate machinery """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -1233,7 +1235,7 @@ def _interpolate( inplace=False, downcast=None, **kwargs, - ): + ) -> List["Block"]: """ interpolate using scipy wrappers """ inplace = validate_bool_kwarg(inplace, "inplace") data = self.values if inplace else self.values.copy() @@ -1241,7 +1243,7 @@ def _interpolate( # only deal with floats if not self.is_float: if not self.is_integer: - return self + return [self] data = data.astype(np.float64) if fill_value is None: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0bba8de6682ea..2a239746daac7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -565,8 +565,8 @@ def isna(self, func) -> "BlockManager": def where(self, **kwargs) -> "BlockManager": return self.apply("where", **kwargs) - def setitem(self, **kwargs) -> "BlockManager": - return self.apply("setitem", **kwargs) + def setitem(self, indexer, value) -> "BlockManager": + return self.apply("setitem", indexer=indexer, value=value) def putmask(self, **kwargs): return self.apply("putmask", **kwargs) @@ -583,16 +583,30 @@ def shift(self, **kwargs) -> "BlockManager": def fillna(self, **kwargs) -> "BlockManager": return self.apply("fillna", **kwargs) - def downcast(self, **kwargs) -> "BlockManager": - return self.apply("downcast", **kwargs) + def downcast(self) -> "BlockManager": + return self.apply("downcast") def astype( self, dtype, copy: bool = False, errors: str = "raise" ) -> "BlockManager": return self.apply("astype", dtype=dtype, copy=copy, errors=errors) - def convert(self, **kwargs) -> "BlockManager": - return self.apply("convert", **kwargs) + def convert( + self, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + coerce: bool = False, + ) -> "BlockManager": + return self.apply( + "convert", + copy=copy, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + coerce=coerce, + ) def replace(self, value, **kwargs) -> "BlockManager": assert np.ndim(value) == 0, value diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 1b6cb8447c76d..9c759ff1414fe 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -159,27 +159,14 @@ def test_downcast(self): o = self._construct(shape=4, value=9, dtype=np.int64) result = o.copy() - result._data = o._data.downcast(dtypes="infer") + result._data = o._data.downcast() self._compare(result, o) - o = self._construct(shape=4, value=9.0) - expected = o.astype(np.int64) - result = o.copy() - result._data = o._data.downcast(dtypes="infer") - self._compare(result, expected) - o = self._construct(shape=4, value=9.5) result = o.copy() - result._data = o._data.downcast(dtypes="infer") + result._data = o._data.downcast() self._compare(result, o) - # are close - o = self._construct(shape=4, value=9.000000000005) - result = o.copy() - result._data = o._data.downcast(dtypes="infer") - expected = o.astype(np.int64) - self._compare(result, expected) - def test_constructor_compound_dtypes(self): # see gh-5191 # Compound dtypes should raise NotImplementedError. From fa26bce51b78f0f3137ac977b365593dd706d141 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:12:12 -0700 Subject: [PATCH 357/388] values->_values (#32662) --- pandas/core/indexes/category.py | 14 +++++++------- pandas/core/indexes/interval.py | 4 ++-- pandas/core/indexes/numeric.py | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 6388f2007cb12..52423c4008399 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -396,7 +396,7 @@ def _engine(self): def unique(self, level=None): if level is not None: self._validate_index_level(level) - result = self.values.unique() + result = self._values.unique() # Use _simple_new instead of _shallow_copy to ensure we keep dtype # of result, not self. return type(self)._simple_new(result, name=self.name) @@ -423,7 +423,7 @@ def where(self, cond, other=None): # 3. Rebuild CategoricalIndex. if other is None: other = self._na_value - values = np.where(cond, self.values, other) + values = np.where(cond, self._values, other) cat = Categorical(values, dtype=self.dtype) return type(self)._simple_new(cat, name=self.name) @@ -532,13 +532,13 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): "method='nearest' not implemented yet for CategoricalIndex" ) - if isinstance(target, CategoricalIndex) and self.values.is_dtype_equal(target): - if self.values.equals(target.values): + if isinstance(target, CategoricalIndex) and self._values.is_dtype_equal(target): + if self._values.equals(target._values): # we have the same codes codes = target.codes else: codes = _recode_for_categories( - target.codes, target.categories, self.values.categories + target.codes, target.categories, self._values.categories ) else: if isinstance(target, CategoricalIndex): @@ -560,7 +560,7 @@ def get_indexer_non_unique(self, target): target = target.codes indexer, missing = self._engine.get_indexer_non_unique(target) return ensure_platform_int(indexer), missing - target = target.values + target = target._values codes = self.categories.get_indexer(target) indexer, missing = self._engine.get_indexer_non_unique(codes) @@ -679,7 +679,7 @@ def map(self, mapper): >>> idx.map({'a': 'first', 'b': 'second'}) Index(['first', 'second', nan], dtype='object') """ - return self._shallow_copy_with_infer(self.values.map(mapper)) + return self._shallow_copy_with_infer(self._values.map(mapper)) def delete(self, loc): """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 80d58de6f1437..3d31e7f8054ec 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -409,7 +409,7 @@ def __reduce__(self): @Appender(Index.astype.__doc__) def astype(self, dtype, copy=True): with rewrite_exception("IntervalArray", type(self).__name__): - new_values = self.values.astype(dtype, copy=copy) + new_values = self._values.astype(dtype, copy=copy) if is_interval_dtype(new_values): return self._shallow_copy(new_values) return Index.astype(self, dtype, copy=copy) @@ -887,7 +887,7 @@ def _convert_slice_indexer(self, key: slice, kind: str): def where(self, cond, other=None): if other is None: other = self._na_value - values = np.where(cond, self.values, other) + values = np.where(cond, self._values, other) result = IntervalArray(values) return self._shallow_copy(result) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 2d1d69772c100..3a6f3630c19e7 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -248,7 +248,7 @@ def inferred_type(self) -> str: @property def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak - return self.values.view(self._default_dtype) + return self._values.view(self._default_dtype) class Int64Index(IntegerIndex): @@ -368,7 +368,7 @@ def astype(self, dtype, copy=True): elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): # TODO(jreback); this can change once we have an EA Index type # GH 13149 - arr = astype_nansafe(self.values, dtype=dtype) + arr = astype_nansafe(self._values, dtype=dtype) return Int64Index(arr) return super().astype(dtype, copy=copy) @@ -395,7 +395,7 @@ def _format_native_types( from pandas.io.formats.format import FloatArrayFormatter formatter = FloatArrayFormatter( - self.values, + self._values, na_rep=na_rep, float_format=float_format, decimal=decimal, From 6a3f0307ecceb1fbc6dac76d83f186253cb1b745 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:17:17 -0700 Subject: [PATCH 358/388] ENH: implement EA.size (#32644) --- pandas/core/arrays/base.py | 7 +++++++ pandas/tests/extension/base/interface.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b5da6d4c11616..6aa303dd04703 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -407,6 +407,13 @@ def shape(self) -> Tuple[int, ...]: """ return (len(self),) + @property + def size(self) -> int: + """ + The number of elements in the array. + """ + return np.prod(self.shape) + @property def ndim(self) -> int: """ diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index cdea96334be2a..95fb3d7439b56 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -19,6 +19,9 @@ class BaseInterfaceTests(BaseExtensionTests): def test_len(self, data): assert len(data) == 100 + def test_size(self, data): + assert data.size == 100 + def test_ndim(self, data): assert data.ndim == 1 From 44b3855b837cb678db10147f032a6ef0d727f4f0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:30:12 -0700 Subject: [PATCH 359/388] CLN: remove Block.array_dtype, SingleBlockManager.array_dtype (#32612) --- pandas/core/internals/blocks.py | 17 ----------------- pandas/core/internals/managers.py | 10 ++++------ 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ea1f394aacd5f..f07fa99fe57d6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -11,7 +11,6 @@ import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import DtypeObj from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -256,14 +255,6 @@ def mgr_locs(self, new_mgr_locs): self._mgr_locs = new_mgr_locs - @property - def array_dtype(self) -> DtypeObj: - """ - the dtype to return if I want to construct this block as an - array - """ - return self.dtype - def make_block(self, values, placement=None) -> "Block": """ Create a new block, with type inference propagate any values that are @@ -2921,14 +2912,6 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return Categorical - @property - def array_dtype(self): - """ - the dtype to return if I want to construct this block as an - array - """ - return np.object_ - def to_dense(self): # Categorical.get_values returns a DatetimeIndex for datetime # categories, so we can't simply use `np.asarray(self.values)` like diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2a239746daac7..b434f8dbde4eb 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -193,8 +193,10 @@ def make_empty(self: T, axes=None) -> T: # preserve dtype if possible if self.ndim == 1: assert isinstance(self, SingleBlockManager) # for mypy - arr = np.array([], dtype=self.array_dtype) - blocks = [make_block(arr, placement=slice(0, 0), ndim=1)] + blk = self.blocks[0] + arr = blk.values[:0] + nb = blk.make_block_same_class(arr, placement=slice(0, 0), ndim=1) + blocks = [nb] else: blocks = [] return type(self).from_blocks(blocks, axes) @@ -1585,10 +1587,6 @@ def index(self) -> Index: def dtype(self) -> DtypeObj: return self._block.dtype - @property - def array_dtype(self) -> DtypeObj: - return self._block.array_dtype - def get_dtype_counts(self): return {self.dtype.name: 1} From 3c16ceaac8b6c228a06b92001a9f674376900cf0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:32:23 -0700 Subject: [PATCH 360/388] REF: implement nanops.na_accum_func (#32597) --- pandas/core/generic.py | 72 +++++----------------------------------- pandas/core/nanops.py | 74 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 65 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f6853a1bbb748..e9811e0f60c7d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -30,7 +30,7 @@ from pandas._config import config -from pandas._libs import Timestamp, iNaT, lib +from pandas._libs import Timestamp, lib from pandas._typing import ( Axis, FilePathOrBuffer, @@ -10106,8 +10106,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="minimum", accum_func=np.minimum.accumulate, accum_func_name="min", - mask_a=np.inf, - mask_b=np.nan, examples=_cummin_examples, ) cls.cumsum = _make_cum_function( @@ -10119,8 +10117,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="sum", accum_func=np.cumsum, accum_func_name="sum", - mask_a=0.0, - mask_b=np.nan, examples=_cumsum_examples, ) cls.cumprod = _make_cum_function( @@ -10132,8 +10128,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="product", accum_func=np.cumprod, accum_func_name="prod", - mask_a=1.0, - mask_b=np.nan, examples=_cumprod_examples, ) cls.cummax = _make_cum_function( @@ -10145,8 +10139,6 @@ def mad(self, axis=None, skipna=None, level=None): desc="maximum", accum_func=np.maximum.accumulate, accum_func_name="max", - mask_a=-np.inf, - mask_b=np.nan, examples=_cummax_examples, ) @@ -11186,8 +11178,6 @@ def _make_cum_function( desc: str, accum_func: Callable, accum_func_name: str, - mask_a: float, - mask_b: float, examples: str, ) -> Callable: @Substitution( @@ -11209,61 +11199,15 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): if axis == 1: return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T - def na_accum_func(blk_values): - # We will be applying this function to block values - if blk_values.dtype.kind in ["m", "M"]: - # GH#30460, GH#29058 - # numpy 1.18 started sorting NaTs at the end instead of beginning, - # so we need to work around to maintain backwards-consistency. - orig_dtype = blk_values.dtype - - # We need to define mask before masking NaTs - mask = isna(blk_values) - - if accum_func == np.minimum.accumulate: - # Note: the accum_func comparison fails as an "is" comparison - y = blk_values.view("i8") - y[mask] = np.iinfo(np.int64).max - changed = True - else: - y = blk_values - changed = False - - result = accum_func(y.view("i8"), axis) - if skipna: - np.putmask(result, mask, iNaT) - elif accum_func == np.minimum.accumulate: - # Restore NaTs that we masked previously - nz = (~np.asarray(mask)).nonzero()[0] - if len(nz): - # everything up to the first non-na entry stays NaT - result[: nz[0]] = iNaT - - if changed: - # restore NaT elements - y[mask] = iNaT # TODO: could try/finally for this? - - if isinstance(blk_values, np.ndarray): - result = result.view(orig_dtype) - else: - # DatetimeArray - result = type(blk_values)._from_sequence(result, dtype=orig_dtype) - - elif skipna and not issubclass( - blk_values.dtype.type, (np.integer, np.bool_) - ): - vals = blk_values.copy().T - mask = isna(vals) - np.putmask(vals, mask, mask_a) - result = accum_func(vals, axis) - np.putmask(result, mask, mask_b) - else: - result = accum_func(blk_values.T, axis) + def block_accum_func(blk_values): + values = blk_values.T if hasattr(blk_values, "T") else blk_values - # transpose back for ndarray, not for EA - return result.T if hasattr(result, "T") else result + result = nanops.na_accum_func(values, accum_func, skipna=skipna) + + result = result.T if hasattr(result, "T") else result + return result - result = self._data.apply(na_accum_func) + result = self._data.apply(block_accum_func) d = self._construct_axes_dict() d["copy"] = False diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 269843abb15ee..a5e70bd279d21 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -8,7 +8,7 @@ from pandas._config import get_option from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib -from pandas._typing import Dtype, Scalar +from pandas._typing import ArrayLike, Dtype, Scalar from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -1500,3 +1500,75 @@ def nanpercentile( return result else: return np.percentile(values, q, axis=axis, interpolation=interpolation) + + +def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike: + """ + Cumulative function with skipna support. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate} + skipna : bool + + Returns + ------- + np.ndarray or ExtensionArray + """ + mask_a, mask_b = { + np.cumprod: (1.0, np.nan), + np.maximum.accumulate: (-np.inf, np.nan), + np.cumsum: (0.0, np.nan), + np.minimum.accumulate: (np.inf, np.nan), + }[accum_func] + + # We will be applying this function to block values + if values.dtype.kind in ["m", "M"]: + # GH#30460, GH#29058 + # numpy 1.18 started sorting NaTs at the end instead of beginning, + # so we need to work around to maintain backwards-consistency. + orig_dtype = values.dtype + + # We need to define mask before masking NaTs + mask = isna(values) + + if accum_func == np.minimum.accumulate: + # Note: the accum_func comparison fails as an "is" comparison + y = values.view("i8") + y[mask] = np.iinfo(np.int64).max + changed = True + else: + y = values + changed = False + + result = accum_func(y.view("i8"), axis=0) + if skipna: + result[mask] = iNaT + elif accum_func == np.minimum.accumulate: + # Restore NaTs that we masked previously + nz = (~np.asarray(mask)).nonzero()[0] + if len(nz): + # everything up to the first non-na entry stays NaT + result[: nz[0]] = iNaT + + if changed: + # restore NaT elements + y[mask] = iNaT # TODO: could try/finally for this? + + if isinstance(values, np.ndarray): + result = result.view(orig_dtype) + else: + # DatetimeArray + result = type(values)._from_sequence(result, dtype=orig_dtype) + + elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): + vals = values.copy() + mask = isna(vals) + vals[mask] = mask_a + result = accum_func(vals, axis=0) + result[mask] = mask_b + else: + result = accum_func(values, axis=0) + + return result From 011f4110d114fb07d05a01ebb48259f6dfd09e33 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:37:21 -0700 Subject: [PATCH 361/388] ENH: IntegerArray.astype(dt64) (#32538) --- pandas/_libs/tslib.pyx | 14 ++++---------- pandas/core/arrays/integer.py | 3 +++ pandas/core/tools/datetimes.py | 12 +++++------- pandas/tests/arrays/test_array.py | 2 ++ pandas/tests/arrays/test_integer.py | 9 +++++++++ pandas/tests/frame/test_dtypes.py | 2 +- 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b78b623bfa187..94e757624c136 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -14,7 +14,7 @@ PyDateTime_IMPORT cimport numpy as cnp -from numpy cimport float64_t, int64_t, ndarray +from numpy cimport float64_t, int64_t, ndarray, uint8_t import numpy as np cnp.import_array() @@ -351,7 +351,6 @@ def format_array_from_datetime( def array_with_unit_to_datetime( ndarray values, - ndarray mask, object unit, str errors='coerce' ): @@ -373,8 +372,6 @@ def array_with_unit_to_datetime( ---------- values : ndarray of object Date-like objects to convert. - mask : boolean ndarray - Not-a-time mask for non-nullable integer types conversion, can be None. unit : object Time unit to use during conversion. errors : str, default 'raise' @@ -395,6 +392,7 @@ def array_with_unit_to_datetime( bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult + ndarray mask object tz = None assert is_ignore or is_coerce or is_raise @@ -404,9 +402,6 @@ def array_with_unit_to_datetime( result = values.astype('M8[ns]') else: result, tz = array_to_datetime(values.astype(object), errors=errors) - if mask is not None: - iresult = result.view('i8') - iresult[mask] = NPY_NAT return result, tz m = cast_from_unit(None, unit) @@ -419,9 +414,8 @@ def array_with_unit_to_datetime( if values.dtype.kind == "i": # Note: this condition makes the casting="same_kind" redundant iresult = values.astype('i8', casting='same_kind', copy=False) - # If no mask, fill mask by comparing to NPY_NAT constant - if mask is None: - mask = iresult == NPY_NAT + # fill by comparing to NPY_NAT constant + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype('f8') * m need_to_iterate = False diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e2b66b1a006e4..fb33840ad757c 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -13,6 +13,7 @@ from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( is_bool_dtype, + is_datetime64_dtype, is_float, is_float_dtype, is_integer, @@ -469,6 +470,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = dict(na_value=np.nan) + elif is_datetime64_dtype(dtype): + kwargs = dict(na_value=np.datetime64("NaT")) else: kwargs = {} diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5580146b37d25..c32b4d81c0988 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -323,15 +323,13 @@ def _convert_listlike_datetimes( # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): - # Explicitly pass NaT mask to array_with_unit_to_datetime - mask = arg.isna() - arg = arg._ndarray_values + result = arg.astype(f"datetime64[{unit}]") + tz_parsed = None else: - mask = None - result, tz_parsed = tslib.array_with_unit_to_datetime( - arg, mask, unit, errors=errors - ) + result, tz_parsed = tslib.array_with_unit_to_datetime( + arg, unit, errors=errors + ) if errors == "ignore": from pandas import Index diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index f42b16cf18f20..ad6e6e4a98057 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -222,6 +222,8 @@ def test_array_copy(): # integer ([1, 2], IntegerArray._from_sequence([1, 2])), ([1, None], IntegerArray._from_sequence([1, None])), + ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])), + ([1, np.nan], IntegerArray._from_sequence([1, np.nan])), # string (["a", "b"], StringArray._from_sequence(["a", "b"])), (["a", None], StringArray._from_sequence(["a", None])), diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 0a5a2362bd290..70a029bd74bda 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -633,6 +633,15 @@ def test_astype_specific_casting(self, dtype): expected = pd.Series([1, 2, 3, None], dtype=dtype) tm.assert_series_equal(result, expected) + def test_astype_dt64(self): + # GH#32435 + arr = pd.array([1, 2, 3, pd.NA]) * 10 ** 9 + + result = arr.astype("datetime64[ns]") + + expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]") + tm.assert_numpy_array_equal(result, expected) + def test_construct_cast_invalid(self, dtype): msg = "cannot safely" diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 713d8f3ceeedb..d1a7917bd127b 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -505,7 +505,7 @@ def test_df_where_change_dtype(self): @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) - def test_astype_from_datetimelike_to_objectt(self, dtype, unit): + def test_astype_from_datetimelike_to_object(self, dtype, unit): # tests astype to object dtype # gh-19223 / gh-12425 dtype = f"{dtype}[{unit}]" From 3b66021ecb74da2c35e16958121bd224d5de5264 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 13 Mar 2020 20:41:06 -0700 Subject: [PATCH 362/388] TST: make tests stricter (#32527) --- pandas/tests/arithmetic/test_numeric.py | 18 +++++----- .../dtypes/cast/test_construct_from_scalar.py | 4 +-- pandas/tests/frame/test_constructors.py | 5 +-- pandas/tests/io/excel/test_writers.py | 2 +- pandas/tests/io/json/test_pandas.py | 1 - pandas/tests/io/json/test_ujson.py | 35 ++++++++++++++++--- pandas/tests/io/pytables/test_store.py | 4 +-- pandas/tests/io/test_clipboard.py | 8 +++-- pandas/tests/reshape/merge/test_join.py | 2 +- .../tests/reshape/test_union_categoricals.py | 2 +- pandas/tests/series/methods/test_argsort.py | 4 +-- pandas/tests/series/methods/test_replace.py | 2 +- pandas/tests/series/test_duplicates.py | 4 +-- 13 files changed, 57 insertions(+), 34 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index d4baf2f374cdf..202e30287881f 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -913,13 +913,13 @@ def test_frame_operators(self, float_frame): # TODO: taken from tests.series.test_operators; needs cleanup def test_series_operators(self): - def _check_op(series, other, op, pos_only=False, check_dtype=True): + def _check_op(series, other, op, pos_only=False): left = np.abs(series) if pos_only else series right = np.abs(other) if pos_only else other cython_or_numpy = op(left, right) python = left.combine(right, op) - tm.assert_series_equal(cython_or_numpy, python, check_dtype=check_dtype) + tm.assert_series_equal(cython_or_numpy, python) def check(series, other): simple_ops = ["add", "sub", "mul", "truediv", "floordiv", "mod"] @@ -942,15 +942,15 @@ def check(series, other): check(tser, tser[::2]) check(tser, 5) - def check_comparators(series, other, check_dtype=True): - _check_op(series, other, operator.gt, check_dtype=check_dtype) - _check_op(series, other, operator.ge, check_dtype=check_dtype) - _check_op(series, other, operator.eq, check_dtype=check_dtype) - _check_op(series, other, operator.lt, check_dtype=check_dtype) - _check_op(series, other, operator.le, check_dtype=check_dtype) + def check_comparators(series, other): + _check_op(series, other, operator.gt) + _check_op(series, other, operator.ge) + _check_op(series, other, operator.eq) + _check_op(series, other, operator.lt) + _check_op(series, other, operator.le) check_comparators(tser, 5) - check_comparators(tser, tser + 1, check_dtype=False) + check_comparators(tser, tser + 1) # TODO: taken from tests.series.test_operators; needs cleanup def test_divmod(self): diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py index cc823a3d6e02c..ed272cef3e7ba 100644 --- a/pandas/tests/dtypes/cast/test_construct_from_scalar.py +++ b/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -15,6 +15,4 @@ def test_cast_1d_array_like_from_scalar_categorical(): expected = Categorical(["a", "a"], categories=cats) result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type) - tm.assert_categorical_equal( - result, expected, check_category_order=True, check_dtype=True - ) + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 071d2409f1be2..172800e74d181 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -916,10 +916,7 @@ def test_constructor_mrecarray(self): # from GH3479 assert_fr_equal = functools.partial( - tm.assert_frame_equal, - check_index_type=True, - check_column_type=True, - check_frame_type=True, + tm.assert_frame_equal, check_index_type=True, check_column_type=True, ) arrays = [ ("float", np.array([1.5, 2.0])), diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 59899673cfc31..7ef4c454c5a5d 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -452,7 +452,7 @@ def test_float_types(self, np_type, path): reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) - tm.assert_frame_equal(df, recons, check_dtype=False) + tm.assert_frame_equal(df, recons) @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) def test_bool_types(self, np_type, path): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index f2d35bfb3b5ae..276dfd666c5d0 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -572,7 +572,6 @@ def test_blocks_compat_GH9037(self): df_roundtrip, check_index_type=True, check_column_type=True, - check_frame_type=True, by_blocks=True, check_exact=True, ) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index e966db7a1cc71..e86667626deda 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -48,6 +48,18 @@ def numpy(request): return request.param +def get_int32_compat_dtype(numpy, orient): + # See GH#32527 + dtype = np.int64 + if not ((numpy is None or orient == "index") or (numpy is True and orient is None)): + if compat.is_platform_windows(): + dtype = np.int32 + else: + dtype = np.intp + + return dtype + + class TestUltraJSONTests: @pytest.mark.skipif( compat.is_platform_32bit(), reason="not compliant on 32-bit, xref #15865" @@ -833,13 +845,20 @@ def test_dataframe(self, orient, numpy): if orient == "records" and numpy: pytest.skip("Not idiomatic pandas") + dtype = get_int32_compat_dtype(numpy, orient) + df = DataFrame( - [[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"] + [[1, 2, 3], [4, 5, 6]], + index=["a", "b"], + columns=["x", "y", "z"], + dtype=dtype, ) encode_kwargs = {} if orient is None else dict(orient=orient) decode_kwargs = {} if numpy is None else dict(numpy=numpy) + assert (df.dtypes == dtype).all() output = ujson.decode(ujson.encode(df, **encode_kwargs), **decode_kwargs) + assert (df.dtypes == dtype).all() # Ensure proper DataFrame initialization. if orient == "split": @@ -857,7 +876,8 @@ def test_dataframe(self, orient, numpy): elif orient == "index": df = df.transpose() - tm.assert_frame_equal(output, df, check_dtype=False) + assert (df.dtypes == dtype).all() + tm.assert_frame_equal(output, df) def test_dataframe_nested(self, orient): df = DataFrame( @@ -897,14 +917,20 @@ def test_dataframe_numpy_labelled(self, orient): tm.assert_frame_equal(output, df) def test_series(self, orient, numpy): + dtype = get_int32_compat_dtype(numpy, orient) s = Series( - [10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15] + [10, 20, 30, 40, 50, 60], + name="series", + index=[6, 7, 8, 9, 10, 15], + dtype=dtype, ).sort_values() + assert s.dtype == dtype encode_kwargs = {} if orient is None else dict(orient=orient) decode_kwargs = {} if numpy is None else dict(numpy=numpy) output = ujson.decode(ujson.encode(s, **encode_kwargs), **decode_kwargs) + assert s.dtype == dtype if orient == "split": dec = _clean_dict(output) @@ -920,7 +946,8 @@ def test_series(self, orient, numpy): s.name = None s.index = [0, 1, 2, 3, 4, 5] - tm.assert_series_equal(output, s, check_dtype=False) + assert s.dtype == dtype + tm.assert_series_equal(output, s) def test_series_nested(self, orient): s = Series( diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 2702d378fd153..61ca2e7f5f19d 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -2298,9 +2298,7 @@ def test_index_types(self, setup_path): with catch_warnings(record=True): values = np.random.randn(2) - func = lambda l, r: tm.assert_series_equal( - l, r, check_dtype=True, check_index_type=True - ) + func = lambda l, r: tm.assert_series_equal(l, r, check_index_type=True) with catch_warnings(record=True): ser = Series(values, [0, "y"]) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 3458cfb6ad254..b627e0e1cad54 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -75,7 +75,11 @@ def df(request): ) elif data_type == "mixed": return DataFrame( - {"a": np.arange(1.0, 6.0) + 0.01, "b": np.arange(1, 6), "c": list("abcde")} + { + "a": np.arange(1.0, 6.0) + 0.01, + "b": np.arange(1, 6).astype(np.int64), + "c": list("abcde"), + } ) elif data_type == "float": return tm.makeCustomDataframe( @@ -146,7 +150,7 @@ class TestClipboard: def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None): data.to_clipboard(excel=excel, sep=sep, encoding=encoding) result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding) - tm.assert_frame_equal(data, result, check_dtype=False) + tm.assert_frame_equal(data, result) # Test that default arguments copy as tab delimited def test_round_trip_frame(self, df): diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 725157b7c8523..dc1efa46403be 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -298,7 +298,7 @@ def test_join_on_inner(self): expected = df.join(df2, on="key") expected = expected[expected["value"].notna()] - tm.assert_series_equal(joined["key"], expected["key"], check_dtype=False) + tm.assert_series_equal(joined["key"], expected["key"]) tm.assert_series_equal(joined["value"], expected["value"], check_dtype=False) tm.assert_index_equal(joined.index, expected.index) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index a503173bd74b1..8918d19e4ba7b 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -41,7 +41,7 @@ def test_union_categorical(self): for box in [Categorical, CategoricalIndex, Series]: result = union_categoricals([box(Categorical(a)), box(Categorical(b))]) expected = Categorical(combined) - tm.assert_categorical_equal(result, expected, check_category_order=True) + tm.assert_categorical_equal(result, expected) # new categories ordered by appearance s = Categorical(["x", "y", "z"]) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index c7fe6ed19a2eb..4353eb4c8cd64 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -49,8 +49,8 @@ def test_argsort_stable(self): mexpected = np.argsort(s.values, kind="mergesort") qexpected = np.argsort(s.values, kind="quicksort") - tm.assert_series_equal(mindexer, Series(mexpected), check_dtype=False) - tm.assert_series_equal(qindexer, Series(qexpected), check_dtype=False) + tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected)) + tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected)) msg = ( r"ndarray Expected type , " r"found instead" diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 26eaf53616282..904a455870ab1 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -294,7 +294,7 @@ def test_replace_categorical(self, categorical, numeric): s = pd.Series(categorical) result = s.replace({"A": 1, "B": 2}) expected = pd.Series(numeric) - tm.assert_series_equal(expected, result, check_dtype=False) + tm.assert_series_equal(expected, result) def test_replace_categorical_single(self): # GH 26988 diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py index 3513db6177951..89181a08819b1 100644 --- a/pandas/tests/series/test_duplicates.py +++ b/pandas/tests/series/test_duplicates.py @@ -47,9 +47,9 @@ def test_unique(): # GH 18051 s = Series(Categorical([])) - tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False) + tm.assert_categorical_equal(s.unique(), Categorical([])) s = Series(Categorical([np.nan])) - tm.assert_categorical_equal(s.unique(), Categorical([np.nan]), check_dtype=False) + tm.assert_categorical_equal(s.unique(), Categorical([np.nan])) def test_unique_data_ownership(): From f35bcac3a8b962420056feb272236e7167b466b1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 08:50:16 -0700 Subject: [PATCH 363/388] TST: fix test creating invalid CategoricalBlock (#32519) * TST: fix test creating invalid CategoricalBlock * ValueError->AssertionError --- pandas/core/internals/blocks.py | 4 ++++ pandas/tests/internals/test_internals.py | 13 ++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f07fa99fe57d6..83980e9028e9a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1739,6 +1739,10 @@ def __init__(self, values, placement, ndim=None): values = self._maybe_coerce_values(values) super().__init__(values, placement, ndim) + if self.ndim == 2 and len(self.mgr_locs) != 1: + # TODO(2DEA): check unnecessary with 2D EAs + raise AssertionError("block.size != values.size") + def _maybe_coerce_values(self, values): """ Unbox to an extension array. diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0bbea671bae19..1a7d5839d9a11 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -540,6 +540,13 @@ def _compare(old_mgr, new_mgr): assert new_mgr.get("g").dtype == np.float64 assert new_mgr.get("h").dtype == np.float16 + def test_invalid_ea_block(self): + with pytest.raises(AssertionError, match="block.size != values.size"): + create_mgr("a: category; b: category") + + with pytest.raises(AssertionError, match="block.size != values.size"): + create_mgr("a: category2; b: category2") + def test_interleave(self): # self @@ -552,14 +559,10 @@ def test_interleave(self): # will be converted according the actual dtype of the underlying mgr = create_mgr("a: category") assert mgr.as_array().dtype == "i8" - mgr = create_mgr("a: category; b: category") - assert mgr.as_array().dtype == "i8" mgr = create_mgr("a: category; b: category2") assert mgr.as_array().dtype == "object" mgr = create_mgr("a: category2") assert mgr.as_array().dtype == "object" - mgr = create_mgr("a: category2; b: category2") - assert mgr.as_array().dtype == "object" # combinations mgr = create_mgr("a: f8") @@ -702,7 +705,7 @@ def test_equals(self): "a:i8;b:f8", # basic case "a:i8;b:f8;c:c8;d:b", # many types "a:i8;e:dt;f:td;g:string", # more types - "a:i8;b:category;c:category2;d:category2", # categories + "a:i8;b:category;c:category2", # categories "c:sparse;d:sparse_na;b:f8", # sparse ], ) From 5c7a901d6cdf4b281412b3250cb7bdbe6a64b0ca Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 14 Mar 2020 17:54:55 +0200 Subject: [PATCH 364/388] PERF: Using Numpy C-API arange (#32681) --- pandas/_libs/internals.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 437406cbbd819..7f861e587e637 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -7,7 +7,9 @@ cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX import numpy as np -from numpy cimport int64_t +cimport numpy as cnp +from numpy cimport NPY_INT64, int64_t +cnp.import_array() from pandas._libs.algos import ensure_int64 @@ -105,7 +107,9 @@ cdef class BlockPlacement: Py_ssize_t start, stop, end, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) - self._as_array = np.arange(start, stop, step, dtype=np.int64) + # NOTE: this is the C-optimized equivalent of + # np.arange(start, stop, step, dtype=np.int64) + self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INT64) self._has_array = True return self._as_array From 8111d64c68b7b355471b14325d9d8aafc5618ff2 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 14 Mar 2020 16:29:23 +0000 Subject: [PATCH 365/388] PERF: MultiIndex._shallow_copy (#32669) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/indexes/multi.py | 9 ++++++++- pandas/core/indexes/period.py | 6 ++++-- pandas/core/indexes/range.py | 6 ++++-- pandas/tests/indexes/common.py | 13 +++++++++++++ 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5b6f70be478c2..48eff0543ad4d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -190,7 +190,7 @@ Performance improvements - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) - The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index, avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of - existing indexes (:issue:`28584`, :issue:`32640`) + existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0bb88145646ed..122097f4478d7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -276,6 +276,7 @@ def __new__( raise ValueError("Must pass non-zero number of levels/codes") result = object.__new__(MultiIndex) + result._cache = {} # we've already validated levels and codes, so shortcut here result._set_levels(levels, copy=copy, validate=False) @@ -991,7 +992,13 @@ def _shallow_copy(self, values=None, **kwargs): # discards freq kwargs.pop("freq", None) return MultiIndex.from_tuples(values, names=names, **kwargs) - return self.copy(**kwargs) + + result = self.copy(**kwargs) + result._cache = self._cache.copy() + # GH32669 + if "levels" in result._cache: + del result._cache["levels"] + return result def _shallow_copy_with_infer(self, values, **kwargs): # On equal MultiIndexes the difference is empty. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8ae792d3f63b5..f83234f1aac0b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -251,11 +251,13 @@ def _has_complex_internals(self): def _shallow_copy(self, values=None, name: Label = no_default): name = name if name is not no_default else self.name - + cache = self._cache.copy() if values is None else {} if values is None: values = self._data - return self._simple_new(values, name=name) + result = self._simple_new(values, name=name) + result._cache = cache + return result def _maybe_convert_timedelta(self, other): """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c21d8df2476b3..2c038564f4e6f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -141,7 +141,7 @@ def _simple_new(cls, values: range, name: Label = None) -> "RangeIndex": result._range = values result.name = name - + result._cache = {} result._reset_identity() return result @@ -391,7 +391,9 @@ def _shallow_copy(self, values=None, name: Label = no_default): name = self.name if name is no_default else name if values is None: - return self._simple_new(self._range, name=name) + result = self._simple_new(self._range, name=name) + result._cache = self._cache.copy() + return result else: return Int64Index._simple_new(values, name=name) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e5af0d9c03979..c13385c135e9f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -919,3 +919,16 @@ def test_contains_requires_hashable_raises(self): with pytest.raises(TypeError): {} in idx._engine + + def test_shallow_copy_copies_cache(self): + # GH32669 + idx = self.create_index() + idx.get_loc(idx[0]) # populates the _cache. + shallow_copy = idx._shallow_copy() + + # check that the shallow_copied cache is a copy of the original + assert idx._cache == shallow_copy._cache + assert idx._cache is not shallow_copy._cache + # cache values should reference the same object + for key, val in idx._cache.items(): + assert shallow_copy._cache[key] is val, key From 029f7073b1fa309923b493bbcff602d28057eb11 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 09:33:19 -0700 Subject: [PATCH 366/388] Disallow lossy SparseArray conversion (#32501) --- pandas/core/arrays/sparse/array.py | 22 ++++++++++++++++++---- pandas/tests/arrays/sparse/test_array.py | 16 ++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 3b5e6b2d2bcd3..93091555201e8 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -27,6 +27,7 @@ is_array_like, is_bool_dtype, is_datetime64_any_dtype, + is_datetime64tz_dtype, is_dtype_equal, is_integer, is_object_dtype, @@ -42,7 +43,7 @@ from pandas.core.arrays.sparse.dtype import SparseDtype from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core.construction import sanitize_array +from pandas.core.construction import extract_array, sanitize_array from pandas.core.indexers import check_array_indexer from pandas.core.missing import interpolate_2d import pandas.core.ops as ops @@ -312,7 +313,7 @@ def __init__( dtype = dtype.subtype if index is not None and not is_scalar(data): - raise Exception("must only pass scalars with an index ") + raise Exception("must only pass scalars with an index") if is_scalar(data): if index is not None: @@ -367,6 +368,19 @@ def __init__( sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: + data = extract_array(data, extract_numpy=True) + if not isinstance(data, np.ndarray): + # EA + if is_datetime64tz_dtype(data.dtype): + warnings.warn( + f"Creating SparseArray from {data.dtype} data " + "loses timezone information. Cast to object before " + "sparse to retain timezone information.", + UserWarning, + stacklevel=2, + ) + data = np.asarray(data, dtype="datetime64[ns]") + data = np.asarray(data) sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype ) @@ -1497,7 +1511,7 @@ def _formatter(self, boxed=False): SparseArray._add_unary_ops() -def make_sparse(arr, kind="block", fill_value=None, dtype=None, copy=False): +def make_sparse(arr: np.ndarray, kind="block", fill_value=None, dtype=None, copy=False): """ Convert ndarray to sparse format @@ -1513,7 +1527,7 @@ def make_sparse(arr, kind="block", fill_value=None, dtype=None, copy=False): ------- (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) """ - arr = com.values_from_object(arr) + assert isinstance(arr, np.ndarray) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index baca18239b929..4dab86166e13c 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -96,6 +96,22 @@ def test_constructor_na_dtype(self, dtype): with pytest.raises(ValueError, match="Cannot convert"): SparseArray([0, 1, np.nan], dtype=dtype) + def test_constructor_warns_when_losing_timezone(self): + # GH#32501 warn when losing timezone inforamtion + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + + expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]")) + + with tm.assert_produces_warning(UserWarning): + result = SparseArray(dti) + + tm.assert_sp_array_equal(result, expected) + + with tm.assert_produces_warning(UserWarning): + result = SparseArray(pd.Series(dti)) + + tm.assert_sp_array_equal(result, expected) + def test_constructor_spindex_dtype(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) # XXX: Behavior change: specifying SparseIndex no longer changes the From 98a6b3c56d64ef606e50a3c1a3a15ef6132042b3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 09:34:47 -0700 Subject: [PATCH 367/388] ENH: implement ExtensionIndex.insert (#32476) --- pandas/core/indexes/base.py | 8 +++++--- pandas/core/indexes/extension.py | 5 +++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 23e68802eb126..31966489403f4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5195,9 +5195,11 @@ def insert(self, loc: int, item): ------- new_index : Index """ - _self = np.asarray(self) - item = self._coerce_scalar_to_index(item)._ndarray_values - idx = np.concatenate((_self[:loc], item, _self[loc:])) + # Note: this method is overriden by all ExtensionIndex subclasses, + # so self is never backed by an EA. + arr = np.asarray(self) + item = self._coerce_scalar_to_index(item)._values + idx = np.concatenate((arr[:loc], item, arr[loc:])) return self._shallow_copy_with_infer(idx) def drop(self, labels, errors: str_t = "raise"): diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 4984fc27516ff..6d5f0dbb830f9 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -6,6 +6,7 @@ import numpy as np from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -248,6 +249,10 @@ def repeat(self, repeats, axis=None): result = self._data.repeat(repeats, axis=axis) return self._shallow_copy(result) + def insert(self, loc: int, item): + # ExtensionIndex subclasses must override Index.insert + raise AbstractMethodError(self) + def _concat_same_dtype(self, to_concat, name): arr = type(self._data)._concat_same_type(to_concat) return type(self)._simple_new(arr, name=name) From fd2e002e87eaabff3bd8d05bfaa037df468cd752 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 09:47:44 -0700 Subject: [PATCH 368/388] ENH: Categorical.fillna allow Categorical/ndarray (#32420) --- pandas/core/algorithms.py | 18 +++++++++++------- pandas/core/arrays/categorical.py | 9 +++++++-- pandas/core/generic.py | 2 ++ pandas/core/internals/managers.py | 6 +----- .../tests/arrays/categorical/test_missing.py | 15 +++++++++++++++ pandas/tests/test_algos.py | 10 ++++++++++ 6 files changed, 46 insertions(+), 14 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f9059054ba59f..1a51101bc8db8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,6 +11,7 @@ from pandas._libs import Timestamp, algos, hashtable as htable, lib from pandas._libs.tslib import iNaT +from pandas._typing import AnyArrayLike from pandas.util._decorators import doc from pandas.core.dtypes.cast import ( @@ -45,10 +46,14 @@ is_unsigned_integer_dtype, needs_i8_conversion, ) -from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCIndexClass, + ABCSeries, +) from pandas.core.dtypes.missing import isna, na_value_for_dtype -import pandas.core.common as com from pandas.core.construction import array, extract_array from pandas.core.indexers import validate_indices @@ -384,7 +389,7 @@ def unique(values): unique1d = unique -def isin(comps, values) -> np.ndarray: +def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: """ Compute the isin boolean array. @@ -409,15 +414,14 @@ def isin(comps, values) -> np.ndarray: f"to isin(), you passed a [{type(values).__name__}]" ) - if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): + if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)): values = construct_1d_object_array_from_listlike(list(values)) + comps = extract_array(comps, extract_numpy=True) if is_categorical_dtype(comps): # TODO(extension) # handle categoricals - return comps._values.isin(values) - - comps = com.values_from_object(comps) + return comps.isin(values) # type: ignore comps, dtype = _ensure_data(comps) values, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index bcb3fa53e311b..56ace48010108 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1733,12 +1733,17 @@ def fillna(self, value=None, method=None, limit=None): # If value is a dict or a Series (a dict value has already # been converted to a Series) - if isinstance(value, ABCSeries): - if not value[~value.isin(self.categories)].isna().all(): + if isinstance(value, (np.ndarray, Categorical, ABCSeries)): + # We get ndarray or Categorical if called via Series.fillna, + # where it will unwrap another aligned Series before getting here + + mask = ~algorithms.isin(value, self.categories) + if not isna(value[mask]).all(): raise ValueError("fill value must be in categories") values_codes = _get_codes_for_values(value, self.categories) indexer = np.where(codes == -1) + codes = codes.copy() codes[indexer] = values_codes[indexer] # If value is not a dict or Series it should be a scalar diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e9811e0f60c7d..8d56311331d4d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6089,6 +6089,8 @@ def fillna( value = create_series_with_explicit_dtype( value, dtype_if_empty=object ) + value = value.reindex(self.index, copy=False) + value = value._values elif not is_list_like(value): pass else: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b434f8dbde4eb..93d4b02310d54 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -391,6 +391,7 @@ def apply(self: T, f, filter=None, **kwargs) -> T: BlockManager """ result_blocks = [] + # fillna: Series/DataFrame is responsible for making sure value is aligned # filter kwarg is used in replace-* family of methods if filter is not None: @@ -415,11 +416,6 @@ def apply(self: T, f, filter=None, **kwargs) -> T: align_keys = ["new", "mask"] else: align_keys = ["mask"] - elif f == "fillna": - # fillna internally does putmask, maybe it's better to do this - # at mgr, not block level? - align_copy = False - align_keys = ["value"] else: align_keys = [] diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 8889f45a84237..9eb3c8b3a8c48 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -82,3 +82,18 @@ def test_fillna_iterable_category(self, named): expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) tm.assert_categorical_equal(result, expected) + + def test_fillna_array(self): + # accept Categorical or ndarray value if it holds appropriate values + cat = Categorical(["A", "B", "C", None, None]) + + other = cat.fillna("C") + result = cat.fillna(other) + tm.assert_categorical_equal(result, other) + assert isna(cat[-1]) # didnt modify original inplace + + other = np.array(["A", "B", "C", "B", "A"]) + result = cat.fillna(other) + expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype) + tm.assert_categorical_equal(result, expected) + assert isna(cat[-1]) # didnt modify original inplace diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a1de9c435c9ba..ad7028702ec8c 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -760,6 +760,16 @@ def test_categorical_from_codes(self): result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) + def test_categorical_isin(self): + vals = np.array([0, 1, 2, 0]) + cats = ["a", "b", "c"] + cat = Categorical(1).from_codes(vals, cats) + other = Categorical(1).from_codes(np.array([0, 1]), cats) + + expected = np.array([True, True, False, True]) + result = algos.isin(cat, other) + tm.assert_numpy_array_equal(expected, result) + def test_same_nan_is_in(self): # GH 22160 # nan is special, because from " a is b" doesn't follow "a == b" From aa9a6dc5b77f388c37aab5178c690cea92da25c3 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Sat, 14 Mar 2020 23:50:21 +0700 Subject: [PATCH 369/388] DOC: Fix EX01 in pandas.DataFrame.idxmin (#32697) --- pandas/core/frame.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e153fdaac16e2..b0909e23b44c5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8020,6 +8020,35 @@ def idxmin(self, axis=0, skipna=True) -> Series: Notes ----- This method is the DataFrame version of ``ndarray.argmin``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the minimum value in each column. + + >>> df.idxmin() + consumption Pork + co2_emissions Wheat Products + dtype: object + + To return the index for the minimum value in each row, use ``axis="columns"``. + + >>> df.idxmin(axis="columns") + Pork consumption + Wheat Products co2_emissions + Beef consumption + dtype: object """ axis = self._get_axis_number(axis) indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) From 427af4d14b0ea6f08df839464a02daea59402f3d Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 14 Mar 2020 19:30:01 +0000 Subject: [PATCH 370/388] CI: Update pipelines config to trigger on PRs (#32706) --- azure-pipelines.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 42a039af46e94..d042bda77d4e8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,4 +1,10 @@ # Adapted from https://github.com/numba/numba/blob/master/azure-pipelines.yml +trigger: +- master + +pr: +- master + jobs: # Mac and Linux use the same template - template: ci/azure/posix.yml From b32c4e8feffeaaf9bbc65f6be88d15c2cfa94bcc Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 14 Mar 2020 12:39:08 -0700 Subject: [PATCH 371/388] skip 32 bit linux (#32708) --- ci/azure/posix.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index c9a2e4eefd19d..437cc9b161e8a 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -38,11 +38,11 @@ jobs: LC_ALL: "it_IT.utf8" EXTRA_APT: "language-pack-it xsel" - py36_32bit: - ENV_FILE: ci/deps/azure-36-32bit.yaml - CONDA_PY: "36" - PATTERN: "not slow and not network and not clipboard" - BITS32: "yes" + #py36_32bit: + # ENV_FILE: ci/deps/azure-36-32bit.yaml + # CONDA_PY: "36" + # PATTERN: "not slow and not network and not clipboard" + # BITS32: "yes" py37_locale: ENV_FILE: ci/deps/azure-37-locale.yaml From d6916330dc3ffb7717a7479593ea44389425d856 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 14 Mar 2020 14:40:59 -0500 Subject: [PATCH 372/388] CLN: Clean frame/test_constructors.py (#32610) --- pandas/tests/frame/test_constructors.py | 106 ++++++++++++------------ 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 172800e74d181..d938c0f6f1066 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -47,15 +47,15 @@ class TestDataFrameConstructors: def test_series_with_name_not_matching_column(self): # GH#9232 - x = pd.Series(range(5), name=1) - y = pd.Series(range(5), name=0) + x = Series(range(5), name=1) + y = Series(range(5), name=0) - result = pd.DataFrame(x, columns=[0]) - expected = pd.DataFrame([], columns=[0]) + result = DataFrame(x, columns=[0]) + expected = DataFrame([], columns=[0]) tm.assert_frame_equal(result, expected) - result = pd.DataFrame(y, columns=[1]) - expected = pd.DataFrame([], columns=[1]) + result = DataFrame(y, columns=[1]) + expected = DataFrame([], columns=[1]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -126,7 +126,7 @@ def test_constructor_cast_failure(self): def test_constructor_dtype_copy(self): orig_df = DataFrame({"col1": [1.0], "col2": [2.0], "col3": [3.0]}) - new_df = pd.DataFrame(orig_df, dtype=float, copy=True) + new_df = DataFrame(orig_df, dtype=float, copy=True) new_df["col1"] = 200.0 assert orig_df["col1"][0] == 1.0 @@ -220,10 +220,10 @@ def test_constructor_rec(self, float_frame): index = float_frame.index df = DataFrame(rec) - tm.assert_index_equal(df.columns, pd.Index(rec.dtype.names)) + tm.assert_index_equal(df.columns, Index(rec.dtype.names)) df2 = DataFrame(rec, index=index) - tm.assert_index_equal(df2.columns, pd.Index(rec.dtype.names)) + tm.assert_index_equal(df2.columns, Index(rec.dtype.names)) tm.assert_index_equal(df2.index, index) rng = np.arange(len(rec))[::-1] @@ -298,7 +298,7 @@ def test_constructor_dict(self): tm.assert_series_equal(frame["col1"], datetime_series.rename("col1")) - exp = pd.Series( + exp = Series( np.concatenate([[np.nan] * 5, datetime_series_short.values]), index=datetime_series.index, name="col2", @@ -325,7 +325,7 @@ def test_constructor_dict(self): # Length-one dict micro-optimization frame = DataFrame({"A": {"1": 1, "2": 2}}) - tm.assert_index_equal(frame.index, pd.Index(["1", "2"])) + tm.assert_index_equal(frame.index, Index(["1", "2"])) # empty dict plus index idx = Index([0, 1, 2]) @@ -418,8 +418,8 @@ def test_constructor_dict_order_insertion(self): def test_constructor_dict_nan_key_and_columns(self): # GH 16894 - result = pd.DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) - expected = pd.DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) + result = DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) + expected = DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) tm.assert_frame_equal(result, expected) def test_constructor_multi_index(self): @@ -428,29 +428,29 @@ def test_constructor_multi_index(self): tuples = [(2, 3), (3, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - assert pd.isna(df).values.ravel().all() + assert isna(df).values.ravel().all() tuples = [(3, 3), (2, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - assert pd.isna(df).values.ravel().all() + assert isna(df).values.ravel().all() def test_constructor_2d_index(self): # GH 25416 # handling of 2d index in construction - df = pd.DataFrame([[1]], columns=[[1]], index=[1, 2]) - expected = pd.DataFrame( + df = DataFrame([[1]], columns=[[1]], index=[1, 2]) + expected = DataFrame( [1, 1], index=pd.Int64Index([1, 2], dtype="int64"), - columns=pd.MultiIndex(levels=[[1]], codes=[[0]]), + columns=MultiIndex(levels=[[1]], codes=[[0]]), ) tm.assert_frame_equal(df, expected) - df = pd.DataFrame([[1]], columns=[[1]], index=[[1, 2]]) - expected = pd.DataFrame( + df = DataFrame([[1]], columns=[[1]], index=[[1, 2]]) + expected = DataFrame( [1, 1], - index=pd.MultiIndex(levels=[[1, 2]], codes=[[0, 1]]), - columns=pd.MultiIndex(levels=[[1]], codes=[[0]]), + index=MultiIndex(levels=[[1, 2]], codes=[[0, 1]]), + columns=MultiIndex(levels=[[1]], codes=[[0]]), ) tm.assert_frame_equal(df, expected) @@ -471,7 +471,7 @@ def test_constructor_error_msgs(self): DataFrame( np.arange(12).reshape((4, 3)), columns=["foo", "bar", "baz"], - index=pd.date_range("2000-01-01", periods=3), + index=date_range("2000-01-01", periods=3), ) arr = np.array([[4, 5, 6]]) @@ -713,14 +713,12 @@ def test_constructor_period(self): # PeriodIndex a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M") b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D") - df = pd.DataFrame({"a": a, "b": b}) + df = DataFrame({"a": a, "b": b}) assert df["a"].dtype == a.dtype assert df["b"].dtype == b.dtype # list of periods - df = pd.DataFrame( - {"a": a.astype(object).tolist(), "b": b.astype(object).tolist()} - ) + df = DataFrame({"a": a.astype(object).tolist(), "b": b.astype(object).tolist()}) assert df["a"].dtype == a.dtype assert df["b"].dtype == b.dtype @@ -882,8 +880,8 @@ def test_constructor_maskedarray_nonfloat(self): def test_constructor_maskedarray_hardened(self): # Check numpy masked arrays with hard masks -- from GH24574 mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask() - result = pd.DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) - expected = pd.DataFrame( + result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = DataFrame( {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, columns=["A", "B"], index=[1, 2], @@ -892,8 +890,8 @@ def test_constructor_maskedarray_hardened(self): tm.assert_frame_equal(result, expected) # Check case where mask is hard but no data are masked mat_hard = ma.ones((2, 2), dtype=float).harden_mask() - result = pd.DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) - expected = pd.DataFrame( + result = DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = DataFrame( {"A": [1.0, 1.0], "B": [1.0, 1.0]}, columns=["A", "B"], index=[1, 2], @@ -907,8 +905,8 @@ def test_constructor_maskedrecarray_dtype(self): np.ma.zeros(5, dtype=[("date", " Date: Sat, 14 Mar 2020 21:12:53 +0100 Subject: [PATCH 373/388] CLN: Remove redundant index test from tests/base/test_ops.py (#32484) --- pandas/tests/base/test_ops.py | 17 ------- pandas/tests/indexing/test_na_indexing.py | 58 ++++++++++++----------- 2 files changed, 30 insertions(+), 45 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 5fb5072e5c9d9..5d666bbae2d6f 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -834,23 +834,6 @@ def test_access_by_position(self, indices): with pytest.raises(IndexError, match=msg): series.iloc[size] - @pytest.mark.parametrize("indexer_klass", [list, pd.Index]) - @pytest.mark.parametrize( - "indexer", - [ - [True] * 10, - [False] * 10, - [True, False, True, True, False, False, True, True, False, True], - ], - ) - def test_bool_indexing(self, indexer_klass, indexer): - # GH 22533 - for idx in self.indexes: - exp_idx = [i for i in range(len(indexer)) if indexer[i]] - tm.assert_index_equal(idx[indexer_klass(indexer)], idx[exp_idx]) - s = pd.Series(idx) - tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx]) - def test_get_indexer_non_unique_dtype_mismatch(self): # GH 25459 indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0])) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index 345ca30ec77eb..9e8ef6e6e1c22 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -7,6 +7,7 @@ @pytest.mark.parametrize( "values, dtype", [ + ([], "object"), ([1, 2, 3], "int64"), ([1.0, 2.0, 3.0], "float64"), (["a", "b", "c"], "object"), @@ -22,42 +23,43 @@ @pytest.mark.parametrize( "mask", [[True, False, False], [True, True, True], [False, False, False]] ) -@pytest.mark.parametrize("box_mask", [True, False]) +@pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series]) @pytest.mark.parametrize("frame", [True, False]) -def test_series_mask_boolean(values, dtype, mask, box_mask, frame): - ser = pd.Series(values, dtype=dtype, index=["a", "b", "c"]) - if frame: - ser = ser.to_frame() - mask = pd.array(mask, dtype="boolean") - if box_mask: - mask = pd.Series(mask, index=ser.index) - - expected = ser[mask.astype("bool")] +def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): + # In case len(values) < 3 + index = ["a", "b", "c"][: len(values)] + mask = mask[: len(values)] - result = ser[mask] - tm.assert_equal(result, expected) - - if not box_mask: - # Series.iloc[Series[bool]] isn't allowed - result = ser.iloc[mask] - tm.assert_equal(result, expected) + obj = pd.Series(values, dtype=dtype, index=index) + if frame: + if len(values) == 0: + # Otherwise obj is an empty DataFrame with shape (0, 1) + obj = pd.DataFrame(dtype=dtype) + else: + obj = obj.to_frame() + + if indexer_class is pd.array: + mask = pd.array(mask, dtype="boolean") + elif indexer_class is pd.Series: + mask = pd.Series(mask, index=obj.index, dtype="boolean") + else: + mask = indexer_class(mask) - result = ser.loc[mask] - tm.assert_equal(result, expected) + expected = obj[mask] - # empty - mask = mask[:0] - ser = ser.iloc[:0] - expected = ser[mask.astype("bool")] - result = ser[mask] + result = obj[mask] tm.assert_equal(result, expected) - if not box_mask: - # Series.iloc[Series[bool]] isn't allowed - result = ser.iloc[mask] + if indexer_class is pd.Series: + msg = "iLocation based boolean indexing cannot use an indexable as a mask" + with pytest.raises(ValueError, match=msg): + result = obj.iloc[mask] + tm.assert_equal(result, expected) + else: + result = obj.iloc[mask] tm.assert_equal(result, expected) - result = ser.loc[mask] + result = obj.loc[mask] tm.assert_equal(result, expected) From 1a5b11d4ad8292a2c8393240e05822fcbf476ab5 Mon Sep 17 00:00:00 2001 From: kernc Date: Sat, 14 Mar 2020 21:21:11 +0100 Subject: [PATCH 374/388] BUG: Fix DataFrame.apply(..., raw=True) not calling with raw array (#32425) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/apply.py | 2 +- pandas/tests/frame/test_apply.py | 13 ++++++++++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 48eff0543ad4d..570c69ed50413 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -335,6 +335,7 @@ Reshaping - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) - :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) +- Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`) Sparse diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 70e0a129c055f..ceb45bc71326e 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -179,7 +179,7 @@ def get_result(self): return self.apply_empty_result() # raw - elif self.raw and not self.obj._is_mixed_type: + elif self.raw: return self.apply_raw() return self.apply_standard() diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 11705cd77a325..ee3cd59c27b44 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -235,7 +235,14 @@ def test_apply_broadcast_error(self, int_frame_const_col): with pytest.raises(ValueError): df.apply(lambda x: Series([1, 2]), axis=1, result_type="broadcast") - def test_apply_raw(self, float_frame): + def test_apply_raw(self, float_frame, mixed_type_frame): + def _assert_raw(x): + assert isinstance(x, np.ndarray) + assert x.ndim == 1 + + float_frame.apply(_assert_raw, raw=True) + float_frame.apply(_assert_raw, axis=1, raw=True) + result0 = float_frame.apply(np.mean, raw=True) result1 = float_frame.apply(np.mean, axis=1, raw=True) @@ -250,6 +257,10 @@ def test_apply_raw(self, float_frame): expected = float_frame * 2 tm.assert_frame_equal(result, expected) + # Mixed dtype (GH-32423) + mixed_type_frame.apply(_assert_raw, raw=True) + mixed_type_frame.apply(_assert_raw, axis=1, raw=True) + def test_apply_axis1(self, float_frame): d = float_frame.index[0] tapplied = float_frame.apply(np.mean, axis=1) From 810a4e5b19616efb503767b4518083c9a59c11e6 Mon Sep 17 00:00:00 2001 From: How Si Wei Date: Sun, 15 Mar 2020 04:31:21 +0800 Subject: [PATCH 375/388] BUG: assignment to multiple columns when some column do not exist (#29334) --- doc/source/whatsnew/v1.1.0.rst | 31 ++++++++++ pandas/core/frame.py | 1 + pandas/core/indexing.py | 40 ++++++++++++ pandas/tests/frame/indexing/test_indexing.py | 64 +++++++++++++++++--- pandas/tests/indexing/test_loc.py | 58 ++++++++++++++++++ 5 files changed, 187 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 570c69ed50413..85fe33b7c83e8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -168,6 +168,37 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss .. --------------------------------------------------------------------------- +.. _whatsnew_110.api_breaking.assignment_to_multiple_columns: + +Assignment to multiple columns of a DataFrame when some columns do not exist +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed with the right values. (:issue:`13658`) + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df[['a', 'c']] = 1 + In [4]: df + Out[4]: + a b + 0 1 1 + 1 1 1 + 2 1 1 + +*New behavior*: + +.. ipython:: python + + df[['a', 'c']] = 1 + df + .. _whatsnew_110.deprecations: Deprecations diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b0909e23b44c5..179857da4d50c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2687,6 +2687,7 @@ def _setitem_array(self, key, value): for k1, k2 in zip(key, value.columns): self[k1] = value[k2] else: + self.loc._ensure_listlike_indexer(key, axis=1) indexer = self.loc._get_listlike_indexer( key, axis=1, raise_missing=False )[1] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c9362a0527c06..7bd25814a12ee 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -8,6 +8,7 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( + is_hashable, is_integer, is_iterator, is_list_like, @@ -581,6 +582,9 @@ def _get_setitem_indexer(self, key): """ Convert a potentially-label-based key into a positional indexer. """ + if self.name == "loc": + self._ensure_listlike_indexer(key) + if self.axis is not None: return self._convert_tuple(key, is_setter=True) @@ -611,6 +615,42 @@ def _get_setitem_indexer(self, key): raise raise IndexingError(key) from e + def _ensure_listlike_indexer(self, key, axis=None): + """ + Ensure that a list-like of column labels are all present by adding them if + they do not already exist. + + Parameters + ---------- + key : _LocIndexer key or list-like of column labels + Target labels. + axis : key axis if known + """ + column_axis = 1 + + # column only exists in 2-dimensional DataFrame + if self.ndim != 2: + return + + if isinstance(key, tuple): + # key may be a tuple if key is a _LocIndexer key + # in that case, set key to the column part of key + key = key[column_axis] + axis = column_axis + + if ( + axis == column_axis + and not isinstance(self.obj.columns, ABCMultiIndex) + and is_list_like_indexer(key) + and not com.is_bool_indexer(key) + and all(is_hashable(k) for k in key) + ): + for k in key: + try: + self.obj[k] + except KeyError: + self.obj[k] = np.nan + def __setitem__(self, key, value): if isinstance(key, tuple): key = tuple(com.apply_if_callable(x, self.obj) for x in key) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 7892030a6727e..923447889d04c 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -215,6 +215,63 @@ def test_setitem_list_of_tuples(self, float_frame): expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "columns,box,expected", + [ + ( + ["A", "B", "C", "D"], + 7, + pd.DataFrame( + [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "D"], + [7, 8], + pd.DataFrame( + [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "B", "C"], + np.array([7, 8, 9], dtype=np.int64), + pd.DataFrame( + [[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"] + ), + ), + ( + ["B", "C", "D"], + [[7, 8, 9], [10, 11, 12], [13, 14, 15]], + pd.DataFrame( + [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "A", "D"], + np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), + pd.DataFrame( + [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "C"], + pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + pd.DataFrame( + [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] + ), + ), + ], + ) + def test_setitem_list_missing_columns(self, columns, box, expected): + # GH 29334 + df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df[columns] = box + tm.assert_frame_equal(df, expected) + def test_setitem_multi_index(self): # GH7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns @@ -459,13 +516,6 @@ def test_setitem(self, float_frame): float_frame["col6"] = series tm.assert_series_equal(series, float_frame["col6"], check_names=False) - msg = ( - r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the " - r"\[columns\]\"" - ) - with pytest.raises(KeyError, match=msg): - float_frame[np.random.randn(len(float_frame) + 1)] = 1 - # set ndarray arr = np.random.randn(len(float_frame)) float_frame["col9"] = arr diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3073fe085de15..a0edf78f79a53 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -631,6 +631,64 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): assert is_scalar(result) and result == "Z" + @pytest.mark.parametrize( + "index,box,expected", + [ + ( + ([0, 2], ["A", "B", "C", "D"]), + 7, + pd.DataFrame( + [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (1, ["C", "D"]), + [7, 8], + pd.DataFrame( + [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (1, ["A", "B", "C"]), + np.array([7, 8, 9], dtype=np.int64), + pd.DataFrame( + [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], + columns=["A", "B", "C"], + ), + ), + ( + (slice(1, 3, None), ["B", "C", "D"]), + [[7, 8, 9], [10, 11, 12]], + pd.DataFrame( + [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (slice(1, 3, None), ["C", "A", "D"]), + np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64), + pd.DataFrame( + [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]], + columns=["A", "B", "C", "D"], + ), + ), + ( + (slice(None, None, None), ["A", "C"]), + pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + pd.DataFrame( + [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] + ), + ), + ], + ) + def test_loc_setitem_missing_columns(self, index, box, expected): + # GH 29334 + df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df.loc[index] = box + tm.assert_frame_equal(df, expected) + def test_loc_coercion(self): # 12411 From 4603c6377099d98436d0b647a06cdb57bf95a01f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 14:29:21 -0700 Subject: [PATCH 376/388] BUG: retain tz in to_records (#32535) * BUG: retain tz in to_records * whatsnew * woops * comment --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/frame.py | 6 ++++-- pandas/tests/frame/methods/test_to_records.py | 20 ++++++++++++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 85fe33b7c83e8..5b17eee9feef1 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -388,6 +388,7 @@ Other instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`) - Set operations on an object-dtype :class:`Index` now always return object-dtype results (:issue:`31401`) - Bug in :meth:`AbstractHolidayCalendar.holidays` when no rules were defined (:issue:`31415`) +- Bug in :meth:`DataFrame.to_records` incorrectly losing timezone information in timezone-aware ``datetime64`` columns (:issue:`32535`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 179857da4d50c..72d9ef7d0d35f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1773,7 +1773,9 @@ def to_records( else: ix_vals = [self.index.values] - arrays = ix_vals + [self[c]._internal_get_values() for c in self.columns] + arrays = ix_vals + [ + np.asarray(self.iloc[:, i]) for i in range(len(self.columns)) + ] count = 0 index_names = list(self.index.names) @@ -1788,7 +1790,7 @@ def to_records( names = [str(name) for name in itertools.chain(index_names, self.columns)] else: - arrays = [self[c]._internal_get_values() for c in self.columns] + arrays = [np.asarray(self.iloc[:, i]) for i in range(len(self.columns))] names = [str(c) for c in self.columns] index_names = [] diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index d0181f0309af1..34b323e55d8cd 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -3,7 +3,14 @@ import numpy as np import pytest -from pandas import CategoricalDtype, DataFrame, MultiIndex, Series, date_range +from pandas import ( + CategoricalDtype, + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, +) import pandas._testing as tm @@ -18,6 +25,17 @@ def test_to_records_dt64(self): result = df.to_records()["index"][0] assert expected == result + def test_to_records_dt64tz_column(self): + # GH#32535 dont less tz in to_records + df = DataFrame({"A": date_range("2012-01-01", "2012-01-02", tz="US/Eastern")}) + + result = df.to_records() + + assert result.dtype["A"] == object + val = result[0][1] + assert isinstance(val, Timestamp) + assert val == df.loc[0, "A"] + def test_to_records_with_multindex(self): # GH#3189 index = [ From 9e9785b8c7c826db305121dc526ee30e1c618b1b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 14:31:17 -0700 Subject: [PATCH 377/388] CLN: avoid Block.get_values where it is unnecessary (#32707) --- pandas/core/internals/blocks.py | 6 ++++-- pandas/core/internals/concat.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 83980e9028e9a..1a92a9486e9e4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -605,7 +605,9 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): # astype formatting else: - values = self.get_values() + # Because we have neither is_extension nor is_datelike, + # self.values already has the correct shape + values = self.values else: values = self.get_values(dtype=dtype) @@ -663,7 +665,7 @@ def _can_hold_element(self, element: Any) -> bool: def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ - values = self.get_values() + values = self.values if slicer is not None: values = values[:, slicer] diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 7570f6eddbd9c..6839d138fbf73 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -217,7 +217,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): else: # No dtype upcasting is done here, it will be performed during # concatenation itself. - values = self.block.get_values() + values = self.block.values if not self.indexers: # If there's no indexing to be done, we want to signal outside From e734449c95497d0e79d7529d94d82b4043a68f8b Mon Sep 17 00:00:00 2001 From: elmonsomiat Date: Sat, 14 Mar 2020 21:33:05 +0000 Subject: [PATCH 378/388] Deprecate Aliases as orient Argument in DataFrame.to_dict (#32516) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/frame.py | 50 +++++++++++++++++++--- pandas/tests/frame/methods/test_to_dict.py | 21 ++++++--- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5b17eee9feef1..4e7bd5a2032a7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -206,7 +206,7 @@ Deprecations - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) - Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) -- +- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 72d9ef7d0d35f..069c99ea8c94b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1401,11 +1401,45 @@ def to_dict(self, orient="dict", into=dict): ) # GH16122 into_c = com.standardize_mapping(into) - if orient.lower().startswith("d"): + + orient = orient.lower() + # GH32515 + if orient.startswith(("d", "l", "s", "r", "i")) and orient not in { + "dict", + "list", + "series", + "split", + "records", + "index", + }: + warnings.warn( + "Using short name for 'orient' is deprecated. Only the " + "options: ('dict', list, 'series', 'split', 'records', 'index') " + "will be used in a future version. Use one of the above " + "to silence this warning.", + FutureWarning, + ) + + if orient.startswith("d"): + orient = "dict" + elif orient.startswith("l"): + orient = "list" + elif orient.startswith("sp"): + orient = "split" + elif orient.startswith("s"): + orient = "series" + elif orient.startswith("r"): + orient = "records" + elif orient.startswith("i"): + orient = "index" + + if orient == "dict": return into_c((k, v.to_dict(into)) for k, v in self.items()) - elif orient.lower().startswith("l"): + + elif orient == "list": return into_c((k, v.tolist()) for k, v in self.items()) - elif orient.lower().startswith("sp"): + + elif orient == "split": return into_c( ( ("index", self.index.tolist()), @@ -1419,9 +1453,11 @@ def to_dict(self, orient="dict", into=dict): ), ) ) - elif orient.lower().startswith("s"): + + elif orient == "series": return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items()) - elif orient.lower().startswith("r"): + + elif orient == "records": columns = self.columns.tolist() rows = ( dict(zip(columns, row)) @@ -1431,13 +1467,15 @@ def to_dict(self, orient="dict", into=dict): into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items()) for row in rows ] - elif orient.lower().startswith("i"): + + elif orient == "index": if not self.index.is_unique: raise ValueError("DataFrame index must be unique for orient='index'.") return into_c( (t[0], dict(zip(self.columns, t[1:]))) for t in self.itertuples(name=None) ) + else: raise ValueError(f"orient '{orient}' not understood") diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index cd9bd169322fd..f1656b46cf356 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -70,8 +70,17 @@ def test_to_dict_invalid_orient(self): with pytest.raises(ValueError, match=msg): df.to_dict(orient="xinvalid") + @pytest.mark.parametrize("orient", ["d", "l", "r", "sp", "s", "i"]) + def test_to_dict_short_orient_warns(self, orient): + # GH#32515 + df = DataFrame({"A": [0, 1]}) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df.to_dict(orient=orient) + @pytest.mark.parametrize("mapping", [dict, defaultdict(list), OrderedDict]) def test_to_dict(self, mapping): + # orient= should only take the listed options + # see GH#32515 test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} # GH#16122 @@ -81,19 +90,19 @@ def test_to_dict(self, mapping): for k2, v2 in v.items(): assert v2 == recons_data[k][k2] - recons_data = DataFrame(test_data).to_dict("l", mapping) + recons_data = DataFrame(test_data).to_dict("list", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k][int(k2) - 1] - recons_data = DataFrame(test_data).to_dict("s", mapping) + recons_data = DataFrame(test_data).to_dict("series", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k][k2] - recons_data = DataFrame(test_data).to_dict("sp", mapping) + recons_data = DataFrame(test_data).to_dict("split", mapping) expected_split = { "columns": ["A", "B"], "index": ["1", "2", "3"], @@ -101,7 +110,7 @@ def test_to_dict(self, mapping): } tm.assert_dict_equal(recons_data, expected_split) - recons_data = DataFrame(test_data).to_dict("r", mapping) + recons_data = DataFrame(test_data).to_dict("records", mapping) expected_records = [ {"A": 1.0, "B": "1"}, {"A": 2.0, "B": "2"}, @@ -113,7 +122,7 @@ def test_to_dict(self, mapping): tm.assert_dict_equal(l, r) # GH#10844 - recons_data = DataFrame(test_data).to_dict("i") + recons_data = DataFrame(test_data).to_dict("index") for k, v in test_data.items(): for k2, v2 in v.items(): @@ -121,7 +130,7 @@ def test_to_dict(self, mapping): df = DataFrame(test_data) df["duped"] = df[df.columns[0]] - recons_data = df.to_dict("i") + recons_data = df.to_dict("index") comp_data = test_data.copy() comp_data["duped"] = comp_data[df.columns[0]] for k, v in comp_data.items(): From 0ed6d538c38010bcbd540cd6413ae8e4b749d9e6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 16:28:53 -0700 Subject: [PATCH 379/388] BUG: pivot_table losing tz (#32558) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/indexes/multi.py | 1 + pandas/core/reshape/util.py | 25 +++++++++++++++++-------- pandas/tests/reshape/test_pivot.py | 8 ++++++++ pandas/tests/reshape/test_util.py | 16 ++++++++++++++++ 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 4e7bd5a2032a7..9a3c6ebdc7284 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -367,6 +367,7 @@ Reshaping - :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) - Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`) +- Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`) Sparse diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 122097f4478d7..5bffc4ec552af 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -565,6 +565,7 @@ def from_product(cls, iterables, sortorder=None, names=lib.no_default): if names is lib.no_default: names = [getattr(it, "name", None) for it in iterables] + # codes are all ndarrays, so cartesian_product is lossless codes = cartesian_product(codes) return MultiIndex(levels, codes, sortorder=sortorder, names=names) diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index d8652c9b4fac9..7abb14303f8cc 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -2,8 +2,6 @@ from pandas.core.dtypes.common import is_list_like -import pandas.core.common as com - def cartesian_product(X): """ @@ -51,9 +49,20 @@ def cartesian_product(X): # if any factor is empty, the cartesian product is empty b = np.zeros_like(cumprodX) - return [ - np.tile( - np.repeat(np.asarray(com.values_from_object(x)), b[i]), np.product(a[i]) - ) - for i, x in enumerate(X) - ] + return [_tile_compat(np.repeat(x, b[i]), np.product(a[i])) for i, x in enumerate(X)] + + +def _tile_compat(arr, num: int): + """ + Index compat for np.tile. + + Notes + ----- + Does not support multi-dimensional `num`. + """ + if isinstance(arr, np.ndarray): + return np.tile(arr, num) + + # Otherwise we have an Index + taker = np.tile(np.arange(len(arr)), num) + return arr.take(taker) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 75c3c565e9d58..cdb1a73abc431 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1026,6 +1026,14 @@ def test_pivot_table_multiindex_only(self, cols): tm.assert_frame_equal(result, expected) + def test_pivot_table_retains_tz(self): + dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam") + df = DataFrame({"A": np.random.randn(3), "B": np.random.randn(3), "C": dti}) + result = df.pivot_table(index=["B", "C"], dropna=False) + + # check tz retention + assert result.index.levels[1].equals(dti) + def test_pivot_integer_columns(self): # caused by upstream bug in unstack diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index cd518dda4edbf..9d074b5ade425 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -25,6 +25,22 @@ def test_datetimeindex(self): tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) + def test_tzaware_retained(self): + x = date_range("2000-01-01", periods=2, tz="US/Pacific") + y = np.array([3, 4]) + result1, result2 = cartesian_product([x, y]) + + expected = x.repeat(2) + tm.assert_index_equal(result1, expected) + + def test_tzaware_retained_categorical(self): + x = date_range("2000-01-01", periods=2, tz="US/Pacific").astype("category") + y = np.array([3, 4]) + result1, result2 = cartesian_product([x, y]) + + expected = x.repeat(2) + tm.assert_index_equal(result1, expected) + def test_empty(self): # product of empty factors X = [[], [0, 1], []] From bab3b1fcbd2ca15fe62412bb34924864d6da4529 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 17:31:43 -0700 Subject: [PATCH 380/388] REF: avoid runtime import of Index (#32710) --- pandas/core/algorithms.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1a51101bc8db8..5b324bc5753ec 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -2025,9 +2025,7 @@ def sort_mixed(values): ) codes = ensure_platform_int(np.asarray(codes)) - from pandas import Index - - if not assume_unique and not Index(values).is_unique: + if not assume_unique and not len(unique(values)) == len(values): raise ValueError("values should be unique if codes is not None") if sorter is None: From e7e5b61c9c2e47ca506abfe532f0028a8016f4df Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 17:34:00 -0700 Subject: [PATCH 381/388] BUG: DatetimeArray._from_sequence accepting bool dtype (#32668) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/datetimes.py | 15 ++++++++++----- pandas/core/series.py | 4 ++-- pandas/core/tools/datetimes.py | 13 ++++++++++++- pandas/tests/arrays/test_datetimes.py | 19 +++++++++++++++++-- 5 files changed, 42 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9a3c6ebdc7284..6443a037a83b5 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -247,6 +247,7 @@ Datetimelike - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) - Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) - :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`) +- Bug in :class:`DatetimeIndex` constructor incorrectly accepting ``bool``-dtyped inputs (:issue:`32668`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7223eda22b3d9..2110f782330fb 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -23,6 +23,7 @@ from pandas.core.dtypes.common import ( _INT64_DTYPE, _NS_DTYPE, + is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, is_datetime64_dtype, @@ -1903,7 +1904,11 @@ def maybe_convert_dtype(data, copy): ------ TypeError : PeriodDType data is passed """ - if is_float_dtype(data): + if not hasattr(data, "dtype"): + # e.g. collections.deque + return data, copy + + if is_float_dtype(data.dtype): # Note: we must cast to datetime64[ns] here in order to treat these # as wall-times instead of UTC timestamps. data = data.astype(_NS_DTYPE) @@ -1911,24 +1916,24 @@ def maybe_convert_dtype(data, copy): # TODO: deprecate this behavior to instead treat symmetrically # with integer dtypes. See discussion in GH#23675 - elif is_timedelta64_dtype(data): + elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): # GH#29794 enforcing deprecation introduced in GH#23539 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]") - elif is_period_dtype(data): + elif is_period_dtype(data.dtype): # Note: without explicitly raising here, PeriodIndex # test_setops.test_join_does_not_recur fails raise TypeError( "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" ) - elif is_categorical_dtype(data): + elif is_categorical_dtype(data.dtype): # GH#18664 preserve tz in going DTI->Categorical->DTI # TODO: cases where we need to do another pass through this func, # e.g. the categories are timedelta64s data = data.categories.take(data.codes, fill_value=NaT)._values copy = False - elif is_extension_array_dtype(data) and not is_datetime64tz_dtype(data): + elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype): # Includes categorical # TODO: We have no tests for these data = np.array(data, dtype=np.object_) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2d8eb9b29498a..e120695cc83e8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2666,9 +2666,9 @@ def combine(self, other, func, fill_value=None) -> "Series": new_values = [func(lv, other) for lv in self._values] new_name = self.name - if is_categorical_dtype(self.values): + if is_categorical_dtype(self.dtype): pass - elif is_extension_array_dtype(self.values): + elif is_extension_array_dtype(self.dtype): # The function can return something of any type, so check # if the type is compatible with the calling EA. new_values = try_cast_to_ea(self._values, new_values) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c32b4d81c0988..8e38332e67439 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -359,7 +359,18 @@ def _convert_listlike_datetimes( # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg - arg, _ = maybe_convert_dtype(arg, copy=False) + try: + arg, _ = maybe_convert_dtype(arg, copy=False) + except TypeError: + if errors == "coerce": + result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) + return DatetimeIndex(result, name=name) + elif errors == "ignore": + from pandas import Index + + result = Index(arg, name=name) + return result + raise arg = ensure_object(arg) require_iso8601 = False diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 2c26e72a245f7..7d80ad3d8c6be 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -89,11 +89,26 @@ def test_non_array_raises(self): with pytest.raises(ValueError, match="list"): DatetimeArray([1, 2, 3]) - def test_other_type_raises(self): + def test_bool_dtype_raises(self): + arr = np.array([1, 2, 3], dtype="bool") + with pytest.raises( ValueError, match="The dtype of 'values' is incorrect.*bool" ): - DatetimeArray(np.array([1, 2, 3], dtype="bool")) + DatetimeArray(arr) + + msg = r"dtype bool cannot be converted to datetime64\[ns\]" + with pytest.raises(TypeError, match=msg): + DatetimeArray._from_sequence(arr) + + with pytest.raises(TypeError, match=msg): + sequence_to_dt64ns(arr) + + with pytest.raises(TypeError, match=msg): + pd.DatetimeIndex(arr) + + with pytest.raises(TypeError, match=msg): + pd.to_datetime(arr) def test_incorrect_dtype_raises(self): with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): From 41322cf31c38544c8d0816588aa07d821eab2a2e Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 14 Mar 2020 17:35:24 -0700 Subject: [PATCH 382/388] Simplified get_blkno_indexers (#32645) Co-authored-by: Parallels --- pandas/_libs/internals.pyx | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 7f861e587e637..5545302fcbfc4 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,4 +1,5 @@ import cython +from collections import defaultdict from cython import Py_ssize_t from cpython.slice cimport PySlice_GetIndicesEx @@ -373,8 +374,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): Py_ssize_t i, start, stop, n, diff object blkno - list group_order - dict group_dict + object group_dict = defaultdict(list) int64_t[:] res_view n = blknos.shape[0] @@ -395,28 +395,16 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): yield cur_blkno, slice(start, n) else: - group_order = [] - group_dict = {} - for i in range(1, n): if blknos[i] != cur_blkno: - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, i)] - else: - group_dict[cur_blkno].append((start, i)) + group_dict[cur_blkno].append((start, i)) start = i cur_blkno = blknos[i] - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, n)] - else: - group_dict[cur_blkno].append((start, n)) + group_dict[cur_blkno].append((start, n)) - for blkno in group_order: - slices = group_dict[blkno] + for blkno, slices in group_dict.items(): if len(slices) == 1: yield blkno, slice(slices[0][0], slices[0][1]) else: From 7a24c35c92f6618b0ab568397b60af5164806617 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Mar 2020 17:36:14 -0700 Subject: [PATCH 383/388] DEPR: Categorical.to_dense (#32639) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/categorical.py | 6 ++++++ pandas/tests/arrays/categorical/test_api.py | 14 ++++++++++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 6443a037a83b5..41b2b93f6fc69 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -207,6 +207,7 @@ Deprecations - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) - Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) - :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`) +- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 56ace48010108..8284a89a29b52 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1675,6 +1675,12 @@ def to_dense(self): ------- dense : array """ + warn( + "Categorical.to_dense is deprecated and will be removed in " + "a future version. Use np.asarray(cat) instead.", + FutureWarning, + stacklevel=2, + ) return np.asarray(self) def fillna(self, value=None, method=None, limit=None): diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index f49f70f5acf77..b99e172674f66 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -247,7 +247,7 @@ def test_set_categories(self): tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) - tm.assert_numpy_array_equal(c.to_dense(), exp) + tm.assert_numpy_array_equal(np.asarray(c), exp) # all "pointers" to '4' must be changed from 3 to 0,... c = c.set_categories([4, 3, 2, 1]) @@ -260,7 +260,7 @@ def test_set_categories(self): # output is the same exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) - tm.assert_numpy_array_equal(c.to_dense(), exp) + tm.assert_numpy_array_equal(np.asarray(c), exp) assert c.min() == 4 assert c.max() == 1 @@ -268,13 +268,19 @@ def test_set_categories(self): c2 = c.set_categories([4, 3, 2, 1], ordered=False) assert not c2.ordered - tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense()) + tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) # set_categories should pass thru the ordering c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) assert not c2.ordered - tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense()) + tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) + + def test_to_dense_deprecated(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + + with tm.assert_produces_warning(FutureWarning): + cat.to_dense() @pytest.mark.parametrize( "values, categories, new_categories", From 2e114ce0f6aa26c8c6ca3257f975f47bde69649b Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 15 Mar 2020 01:37:41 +0100 Subject: [PATCH 384/388] BUG: Fix file descriptor leak (#32598) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/io/parsers.py | 14 +++++++++----- pandas/tests/io/parser/test_common.py | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 41b2b93f6fc69..27ace918a51f1 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -339,6 +339,7 @@ I/O - Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`) - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) - Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) +- Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`) Plotting diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 50b5db0274aa5..52783b3a9e134 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2273,11 +2273,15 @@ def __init__(self, f, **kwds): # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. self._col_indices = None - ( - self.columns, - self.num_original_columns, - self.unnamed_cols, - ) = self._infer_columns() + try: + ( + self.columns, + self.num_original_columns, + self.unnamed_cols, + ) = self._infer_columns() + except (TypeError, ValueError): + self.close() + raise # Now self.columns has the set of columns that we will process. # The original set is stored in self.original_columns. diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index b3aa1aa14a509..33460262a4430 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -15,6 +15,7 @@ from pandas._libs.tslib import Timestamp from pandas.errors import DtypeWarning, EmptyDataError, ParserError +import pandas.util._test_decorators as td from pandas import DataFrame, Index, MultiIndex, Series, compat, concat import pandas._testing as tm @@ -2079,3 +2080,16 @@ def test_integer_precision(all_parsers): result = parser.read_csv(StringIO(s), header=None)[4] expected = Series([4321583677327450765, 4321113141090630389], name=4) tm.assert_series_equal(result, expected) + + +def test_file_descriptor_leak(all_parsers): + # GH 31488 + + parser = all_parsers + with tm.ensure_clean() as path: + + def test(): + with pytest.raises(EmptyDataError, match="No columns to parse from file"): + parser.read_csv(path) + + td.check_file_leaks(test)() From 74c530642c11308df6162b4040ab04ac4e07db9f Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 15 Mar 2020 01:38:57 +0100 Subject: [PATCH 385/388] TST: Add extra test for pandas.to_numeric() for issue #32394 (#32560) --- pandas/tests/tools/test_to_numeric.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 19385e797467c..e0dfeac4ab475 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -627,3 +627,13 @@ def test_non_coerce_uint64_conflict(errors, exp): else: result = to_numeric(ser, errors=errors) tm.assert_series_equal(result, ser) + + +def test_failure_to_convert_uint64_string_to_NaN(): + # GH 32394 + result = to_numeric("uint64", errors="coerce") + assert np.isnan(result) + + ser = Series([32, 64, np.nan]) + result = to_numeric(pd.Series(["32", "64", "uint64"]), errors="coerce") + tm.assert_series_equal(result, ser) From 74f6579941fbe71cf7c033f53977047ac872e469 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 14 Mar 2020 19:41:33 -0500 Subject: [PATCH 386/388] BUG: Don't multiply sets during construction (#32594) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/construction.py | 3 +++ pandas/tests/frame/test_constructors.py | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 27ace918a51f1..21e59805fa143 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -274,7 +274,7 @@ Conversion ^^^^^^^^^^ - Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) - Bug in :class:`Timedelta` construction with large nanoseconds keyword value (:issue:`32402`) -- +- Bug in :class:`DataFrame` construction where sets would be duplicated rather than raising (:issue:`32582`) Strings ^^^^^^^ diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e2d8fba8d4148..c9754ff588896 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -5,6 +5,7 @@ These should not depend on core.internals. """ +from collections import abc from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np @@ -446,6 +447,8 @@ def sanitize_array( # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) + elif isinstance(data, abc.Set): + raise TypeError("Set type is unordered") else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d938c0f6f1066..924952ad334c4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2604,3 +2604,9 @@ def test_from_2d_ndarray_with_dtype(self): expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]") tm.assert_frame_equal(df, expected) + + def test_construction_from_set_raises(self): + # https://github.com/pandas-dev/pandas/issues/32582 + msg = "Set type is unordered" + with pytest.raises(TypeError, match=msg): + pd.DataFrame({"a": {1, 2, 3}}) From 2b34275316c4db4a9c92662eb8ff405021520829 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sun, 15 Mar 2020 01:51:18 +0100 Subject: [PATCH 387/388] CLN: Remove redundant tests for .duplicated and .drop_duplicates in tests/base (#32487) --- pandas/conftest.py | 9 ++ pandas/tests/base/test_ops.py | 102 ------------------ pandas/tests/indexes/test_common.py | 81 +++++++++----- .../series/methods/test_drop_duplicates.py | 20 ++++ 4 files changed, 86 insertions(+), 126 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index dcfc523315c8b..d8f96021cdb15 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -425,6 +425,15 @@ def nselect_method(request): return request.param +@pytest.fixture(params=["first", "last", False]) +def keep(request): + """ + Valid values for the 'keep' parameter used in + .duplicated or .drop_duplicates + """ + return request.param + + @pytest.fixture(params=["left", "right", "both", "neither"]) def closed(request): """ diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 5d666bbae2d6f..5a78e5d6352a9 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -570,108 +570,6 @@ def test_factorize(self, index_or_series_obj, sort): tm.assert_numpy_array_equal(result_codes, expected_codes) tm.assert_index_equal(result_uniques, expected_uniques) - def test_duplicated_drop_duplicates_index(self): - # GH 4060 - for original in self.objs: - if isinstance(original, Index): - - # special case - if original.is_boolean(): - result = original.drop_duplicates() - expected = Index([False, True], name="a") - tm.assert_index_equal(result, expected) - continue - - # original doesn't have duplicates - expected = np.array([False] * len(original), dtype=bool) - duplicated = original.duplicated() - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - result = original.drop_duplicates() - tm.assert_index_equal(result, original) - assert result is not original - - # has_duplicates - assert not original.has_duplicates - - # create repeated values, 3rd and 5th values are duplicated - idx = original[list(range(len(original))) + [5, 3]] - expected = np.array([False] * len(original) + [True, True], dtype=bool) - duplicated = idx.duplicated() - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - tm.assert_index_equal(idx.drop_duplicates(), original) - - base = [False] * len(idx) - base[3] = True - base[5] = True - expected = np.array(base) - - duplicated = idx.duplicated(keep="last") - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - result = idx.drop_duplicates(keep="last") - tm.assert_index_equal(result, idx[~expected]) - - base = [False] * len(original) + [True, True] - base[3] = True - base[5] = True - expected = np.array(base) - - duplicated = idx.duplicated(keep=False) - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - result = idx.drop_duplicates(keep=False) - tm.assert_index_equal(result, idx[~expected]) - - with pytest.raises( - TypeError, - match=r"drop_duplicates\(\) got an unexpected keyword argument", - ): - idx.drop_duplicates(inplace=True) - - else: - expected = Series( - [False] * len(original), index=original.index, name="a" - ) - tm.assert_series_equal(original.duplicated(), expected) - result = original.drop_duplicates() - tm.assert_series_equal(result, original) - assert result is not original - - idx = original.index[list(range(len(original))) + [5, 3]] - values = original._values[list(range(len(original))) + [5, 3]] - s = Series(values, index=idx, name="a") - - expected = Series( - [False] * len(original) + [True, True], index=idx, name="a" - ) - tm.assert_series_equal(s.duplicated(), expected) - tm.assert_series_equal(s.drop_duplicates(), original) - - base = [False] * len(idx) - base[3] = True - base[5] = True - expected = Series(base, index=idx, name="a") - - tm.assert_series_equal(s.duplicated(keep="last"), expected) - tm.assert_series_equal( - s.drop_duplicates(keep="last"), s[~np.array(base)] - ) - - base = [False] * len(original) + [True, True] - base[3] = True - base[5] = True - expected = Series(base, index=idx, name="a") - - tm.assert_series_equal(s.duplicated(keep=False), expected) - tm.assert_series_equal( - s.drop_duplicates(keep=False), s[~np.array(base)] - ) - - s.drop_duplicates(inplace=True) - tm.assert_series_equal(s, original) - def test_drop_duplicates_series_vs_dataframe(self): # GH 14192 df = pd.DataFrame( diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index c6ba5c9d61e9e..6f0920c11a6e6 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -302,32 +302,65 @@ def test_pickle(self, indices): assert indices.equals(unpickled) indices.name = original_name - @pytest.mark.parametrize("keep", ["first", "last", False]) - def test_duplicated(self, indices, keep): - if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): - # MultiIndex tested separately in: - # tests/indexes/multi/test_unique_and_duplicates - pytest.skip("Skip check for empty Index, MultiIndex, RangeIndex") - + def test_drop_duplicates(self, indices, keep): + if isinstance(indices, MultiIndex): + pytest.skip("MultiIndex is tested separately") + if isinstance(indices, RangeIndex): + pytest.skip( + "RangeIndex is tested in test_drop_duplicates_no_duplicates" + " as it cannot hold duplicates" + ) + if len(indices) == 0: + pytest.skip( + "empty index is tested in test_drop_duplicates_no_duplicates" + " as it cannot hold duplicates" + ) + + # make unique index holder = type(indices) + unique_values = list(set(indices)) + unique_idx = holder(unique_values) + + # make duplicated index + n = len(unique_idx) + duplicated_selection = np.random.choice(n, int(n * 1.5)) + idx = holder(unique_idx.values[duplicated_selection]) + + # Series.duplicated is tested separately + expected_duplicated = ( + pd.Series(duplicated_selection).duplicated(keep=keep).values + ) + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated) + + # Series.drop_duplicates is tested separately + expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep)) + tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped) + + def test_drop_duplicates_no_duplicates(self, indices): + if isinstance(indices, MultiIndex): + pytest.skip("MultiIndex is tested separately") - idx = holder(indices) - if idx.has_duplicates: - # We are testing the duplicated-method here, so we need to know - # exactly which indices are duplicate and how (for the result). - # This is not possible if "idx" has duplicates already, which we - # therefore remove. This is seemingly circular, as drop_duplicates - # invokes duplicated, but in the end, it all works out because we - # cross-check with Series.duplicated, which is tested separately. - idx = idx.drop_duplicates() - - n, k = len(idx), 10 - duplicated_selection = np.random.choice(n, k * n) - expected = pd.Series(duplicated_selection).duplicated(keep=keep).values - idx = holder(idx.values[duplicated_selection]) - - result = idx.duplicated(keep=keep) - tm.assert_numpy_array_equal(result, expected) + # make unique index + if isinstance(indices, RangeIndex): + # RangeIndex cannot have duplicates + unique_idx = indices + else: + holder = type(indices) + unique_values = list(set(indices)) + unique_idx = holder(unique_values) + + # check on unique index + expected_duplicated = np.array([False] * len(unique_idx), dtype="bool") + tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated) + result_dropped = unique_idx.drop_duplicates() + tm.assert_index_equal(result_dropped, unique_idx) + # validate shallow copy + assert result_dropped is not unique_idx + + def test_drop_duplicates_inplace(self, indices): + msg = r"drop_duplicates\(\) got an unexpected keyword argument" + with pytest.raises(TypeError, match=msg): + indices.drop_duplicates(inplace=True) def test_has_duplicates(self, indices): holder = type(indices) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 2d052505d5ecc..54f32f979232d 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -44,6 +44,26 @@ def test_drop_duplicates_bool(keep, expected): tm.assert_series_equal(sc, tc[~expected]) +@pytest.mark.parametrize("values", [[], list(range(5))]) +def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values): + tc = Series(values, dtype=np.dtype(any_numpy_dtype)) + expected = Series([False] * len(tc), dtype="bool") + + if tc.dtype == "bool": + # 0 -> False and 1-> True + # any other value would be duplicated + tc = tc[:2] + expected = expected[:2] + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + + result_dropped = tc.drop_duplicates(keep=keep) + tm.assert_series_equal(result_dropped, tc) + + # validate shallow copy + assert result_dropped is not tc + + class TestSeriesDropDuplicates: @pytest.mark.parametrize( "dtype", From 6620dc67ded40c76e466c70027abd012898bbbb1 Mon Sep 17 00:00:00 2001 From: Ari Sosnovsky Date: Sat, 14 Mar 2020 21:09:40 -0400 Subject: [PATCH 388/388] ENH: Add support for dataclasses in the DataFrame constructor (#27999) --- doc/source/user_guide/dsintro.rst | 22 +++++++++++++ pandas/core/dtypes/common.py | 1 + pandas/core/dtypes/inference.py | 36 +++++++++++++++++++++ pandas/core/frame.py | 4 +++ pandas/core/internals/construction.py | 27 ++++++++++++++++ pandas/tests/frame/test_constructors.py | 42 ++++++++++++++++++++++++- 6 files changed, 131 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst index 200d567a62732..d7f7690f8c3d0 100644 --- a/doc/source/user_guide/dsintro.rst +++ b/doc/source/user_guide/dsintro.rst @@ -397,6 +397,28 @@ The result will be a DataFrame with the same index as the input Series, and with one column whose name is the original name of the Series (only if no other column name provided). +.. _basics.dataframe.from_list_dataclasses: + +From a list of dataclasses +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +Data Classes as introduced in `PEP557 `__, +can be passed into the DataFrame constructor. +Passing a list of dataclasses is equivilent to passing a list of dictionaries. + +Please be aware, that that all values in the list should be dataclasses, mixing +types in the list would result in a TypeError. + +.. ipython:: python + + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)]) + **Missing data** Much more will be said on this topic in the :ref:`Missing data ` diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 1afe7edf2641b..f5997a13e785d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -24,6 +24,7 @@ is_array_like, is_bool, is_complex, + is_dataclass, is_decimal, is_dict_like, is_file_like, diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 56b880dca1241..d1607b5ede6c3 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -386,3 +386,39 @@ def is_sequence(obj) -> bool: return not isinstance(obj, (str, bytes)) except (TypeError, AttributeError): return False + + +def is_dataclass(item): + """ + Checks if the object is a data-class instance + + Parameters + ---------- + item : object + + Returns + -------- + is_dataclass : bool + True if the item is an instance of a data-class, + will return false if you pass the data class itself + + Examples + -------- + >>> from dataclasses import dataclass + >>> @dataclass + ... class Point: + ... x: int + ... y: int + + >>> is_dataclass(Point) + False + >>> is_dataclass(Point(0,2)) + True + + """ + try: + from dataclasses import is_dataclass + + return is_dataclass(item) and not isinstance(item, type) + except ImportError: + return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 069c99ea8c94b..fc5d071d7a9a9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,6 +77,7 @@ ensure_platform_int, infer_dtype_from_object, is_bool_dtype, + is_dataclass, is_datetime64_any_dtype, is_dict_like, is_dtype_equal, @@ -117,6 +118,7 @@ from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( arrays_to_mgr, + dataclasses_to_dicts, get_names_from_index, init_dict, init_ndarray, @@ -474,6 +476,8 @@ def __init__( if not isinstance(data, (abc.Sequence, ExtensionArray)): data = list(data) if len(data) > 0: + if is_dataclass(data[0]): + data = dataclasses_to_dicts(data) if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index ab363e10eb098..c4416472d451c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -429,6 +429,33 @@ def _get_axes(N, K, index, columns): return index, columns +def dataclasses_to_dicts(data): + """ Converts a list of dataclass instances to a list of dictionaries + + Parameters + ---------- + data : List[Type[dataclass]] + + Returns + -------- + list_dict : List[dict] + + Examples + -------- + >>> @dataclass + >>> class Point: + ... x: int + ... y: int + + >>> dataclasses_to_dicts([Point(1,2), Point(2,3)]) + [{"x":1,"y":2},{"x":2,"y":3}] + + """ + from dataclasses import asdict + + return list(map(asdict, data)) + + # --------------------------------------------------------------------- # Conversion of Inputs to Arrays diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 924952ad334c4..95f812a99c579 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -9,7 +9,7 @@ import pytest import pytz -from pandas.compat import is_platform_little_endian +from pandas.compat import PY37, is_platform_little_endian from pandas.compat.numpy import _is_numpy_dev from pandas.core.dtypes.common import is_integer_dtype @@ -1364,6 +1364,46 @@ def test_constructor_list_of_namedtuples(self): result = DataFrame(tuples, columns=["y", "z"]) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7") + def test_constructor_list_of_dataclasses(self): + # GH21910 + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + datas = [Point(0, 3), Point(1, 3)] + expected = DataFrame({"x": [0, 1], "y": [3, 3]}) + result = DataFrame(datas) + tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7") + def test_constructor_list_of_dataclasses_with_varying_types(self): + # GH21910 + from dataclasses import make_dataclass + + # varying types + Point = make_dataclass("Point", [("x", int), ("y", int)]) + HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)]) + + datas = [Point(0, 3), HLine(1, 3, 3)] + + expected = DataFrame( + {"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]} + ) + result = DataFrame(datas) + tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7") + def test_constructor_list_of_dataclasses_error_thrown(self): + # GH21910 + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + # expect TypeError + with pytest.raises(TypeError): + DataFrame([Point(0, 0), {"x": 1, "y": 0}]) + def test_constructor_list_of_dict_order(self): # GH10056 data = [