Skip to content

Commit

Permalink
TST: add method/dtype coverage to str-accessor; precursor to pandas-d…
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari authored and Pingviinituutti committed Feb 28, 2019
1 parent 64bd621 commit 2bab7bf
Show file tree
Hide file tree
Showing 3 changed files with 235 additions and 7 deletions.
82 changes: 81 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import date, time, timedelta
from decimal import Decimal
import importlib
import os

Expand All @@ -8,7 +10,7 @@
import pytest
from pytz import FixedOffset, utc

from pandas.compat import PY3
from pandas.compat import PY3, u
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -514,6 +516,84 @@ def any_numpy_dtype(request):
return request.param


# categoricals are handled separately
_any_skipna_inferred_dtype = [
('string', ['a', np.nan, 'c']),
('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
('empty', [np.nan, np.nan, np.nan]),
('empty', []),
('mixed-integer', ['a', np.nan, 2]),
('mixed', ['a', np.nan, 2.0]),
('floating', [1.0, np.nan, 2.0]),
('integer', [1, np.nan, 2]),
('mixed-integer-float', [1, np.nan, 2.0]),
('decimal', [Decimal(1), np.nan, Decimal(2)]),
('boolean', [True, np.nan, False]),
('datetime64', [np.datetime64('2013-01-01'), np.nan,
np.datetime64('2018-01-01')]),
('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]),
('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
# The following two dtypes are commented out due to GH 23554
# ('complex', [1 + 1j, np.nan, 2 + 2j]),
# ('timedelta64', [np.timedelta64(1, 'D'),
# np.nan, np.timedelta64(2, 'D')]),
('timedelta', [timedelta(1), np.nan, timedelta(2)]),
('time', [time(1), np.nan, time(2)]),
('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]),
('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])]
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id


@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
def any_skipna_inferred_dtype(request):
"""
Fixture for all inferred dtypes from _libs.lib.infer_dtype
The covered (inferred) types are:
* 'string'
* 'unicode' (if PY2)
* 'empty'
* 'bytes' (if PY3)
* 'mixed'
* 'mixed-integer'
* 'mixed-integer-float'
* 'floating'
* 'integer'
* 'decimal'
* 'boolean'
* 'datetime64'
* 'datetime'
* 'date'
* 'timedelta'
* 'time'
* 'period'
* 'interval'
Returns
-------
inferred_dtype : str
The string for the inferred dtype from _libs.lib.infer_dtype
values : np.ndarray
An array of object dtype that will be inferred to have
`inferred_dtype`
Examples
--------
>>> import pandas._libs.lib as lib
>>>
>>> def test_something(any_skipna_inferred_dtype):
... inferred_dtype, values = any_skipna_inferred_dtype
... # will pass
... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
"""
inferred_dtype, values = request.param
values = np.array(values, dtype=object) # object dtype to avoid casting

# correctness of inference tested in tests/dtypes/test_inference.py
return inferred_dtype, values


@pytest.fixture
def mock():
"""
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,13 @@ class TestTypeInference(object):
class Dummy():
pass

def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype):
# see pandas/conftest.py
inferred_dtype, values = any_skipna_inferred_dtype

# make sure the inferred dtype of the fixture is as requested
assert inferred_dtype == lib.infer_dtype(values, skipna=True)

def test_length_zero(self):
result = lib.infer_dtype(np.array([], dtype='i4'))
assert result == 'integer'
Expand Down
153 changes: 147 additions & 6 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import numpy as np
from numpy.random import randint

from pandas.compat import range, u
from pandas.compat import range, u, PY3
import pandas.compat as compat
from pandas import Index, Series, DataFrame, isna, MultiIndex, notna, concat

Expand Down Expand Up @@ -118,6 +118,55 @@ def any_string_method(request):
return request.param


# subset of the full set from pandas/conftest.py
_any_allowed_skipna_inferred_dtype = [
('string', ['a', np.nan, 'c']),
('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
('empty', [np.nan, np.nan, np.nan]),
('empty', []),
('mixed-integer', ['a', np.nan, 2])
]
ids, _ = zip(*_any_allowed_skipna_inferred_dtype) # use inferred type as id


@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids)
def any_allowed_skipna_inferred_dtype(request):
"""
Fixture for all (inferred) dtypes allowed in StringMethods.__init__
The covered (inferred) types are:
* 'string'
* 'unicode' (if PY2)
* 'empty'
* 'bytes' (if PY3)
* 'mixed'
* 'mixed-integer'
Returns
-------
inferred_dtype : str
The string for the inferred dtype from _libs.lib.infer_dtype
values : np.ndarray
An array of object dtype that will be inferred to have
`inferred_dtype`
Examples
--------
>>> import pandas._libs.lib as lib
>>>
>>> def test_something(any_allowed_skipna_inferred_dtype):
... inferred_dtype, values = any_skipna_inferred_dtype
... # will pass
... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
"""
inferred_dtype, values = request.param
values = np.array(values, dtype=object) # object dtype to avoid casting

# correctness of inference tested in tests/dtypes/test_inference.py
return inferred_dtype, values


class TestStringMethods(object):

def test_api(self):
Expand All @@ -126,11 +175,103 @@ def test_api(self):
assert Series.str is strings.StringMethods
assert isinstance(Series(['']).str, strings.StringMethods)

# GH 9184
invalid = Series([1])
with pytest.raises(AttributeError, match="only use .str accessor"):
invalid.str
assert not hasattr(invalid, 'str')
@pytest.mark.parametrize('dtype', [object, 'category'])
@pytest.mark.parametrize('box', [Series, Index])
def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
# one instance of parametrized fixture
inferred_dtype, values = any_skipna_inferred_dtype

t = box(values, dtype=dtype) # explicit dtype to avoid casting

# TODO: get rid of these xfails
if dtype == 'category' and inferred_dtype in ['period', 'interval']:
pytest.xfail(reason='Conversion to numpy array fails because '
'the ._values-attribute is not a numpy array for '
'PeriodArray/IntervalArray; see GH 23553')
if box == Index and inferred_dtype in ['empty', 'bytes']:
pytest.xfail(reason='Raising too restrictively; '
'solved by GH 23167')
if (box == Index and dtype == object
and inferred_dtype in ['boolean', 'date', 'time']):
pytest.xfail(reason='Inferring incorrectly because of NaNs; '
'solved by GH 23167')
if (box == Series
and (dtype == object and inferred_dtype not in [
'string', 'unicode', 'empty',
'bytes', 'mixed', 'mixed-integer'])
or (dtype == 'category'
and inferred_dtype in ['decimal', 'boolean', 'time'])):
pytest.xfail(reason='Not raising correctly; solved by GH 23167')

types_passing_constructor = ['string', 'unicode', 'empty',
'bytes', 'mixed', 'mixed-integer']
if inferred_dtype in types_passing_constructor:
# GH 6106
assert isinstance(t.str, strings.StringMethods)
else:
# GH 9184, GH 23011, GH 23163
with pytest.raises(AttributeError, match='Can only use .str '
'accessor with string values.*'):
t.str
assert not hasattr(t, 'str')

@pytest.mark.parametrize('dtype', [object, 'category'])
@pytest.mark.parametrize('box', [Series, Index])
def test_api_per_method(self, box, dtype,
any_allowed_skipna_inferred_dtype,
any_string_method):
# this test does not check correctness of the different methods,
# just that the methods work on the specified (inferred) dtypes,
# and raise on all others

# one instance of each parametrized fixture
inferred_dtype, values = any_allowed_skipna_inferred_dtype
method_name, args, kwargs = any_string_method

# TODO: get rid of these xfails
if (method_name not in ['encode', 'decode', 'len']
and inferred_dtype == 'bytes'):
pytest.xfail(reason='Not raising for "bytes", see GH 23011;'
'Also: malformed method names, see GH 23551; '
'solved by GH 23167')
if (method_name == 'cat'
and inferred_dtype in ['mixed', 'mixed-integer']):
pytest.xfail(reason='Bad error message; should raise better; '
'solved by GH 23167')
if box == Index and inferred_dtype in ['empty', 'bytes']:
pytest.xfail(reason='Raising too restrictively; '
'solved by GH 23167')
if (box == Index and dtype == object
and inferred_dtype in ['boolean', 'date', 'time']):
pytest.xfail(reason='Inferring incorrectly because of NaNs; '
'solved by GH 23167')
if box == Index and dtype == 'category':
pytest.xfail(reason='Broken methods on CategoricalIndex; '
'see GH 23556')

t = box(values, dtype=dtype) # explicit dtype to avoid casting
method = getattr(t.str, method_name)

bytes_allowed = method_name in ['encode', 'decode', 'len']
# as of v0.23.4, all methods except 'cat' are very lenient with the
# allowed data types, just returning NaN for entries that error.
# This could be changed with an 'errors'-kwarg to the `str`-accessor,
# see discussion in GH 13877
mixed_allowed = method_name not in ['cat']

allowed_types = (['string', 'unicode', 'empty']
+ ['bytes'] * bytes_allowed
+ ['mixed', 'mixed-integer'] * mixed_allowed)

if inferred_dtype in allowed_types:
method(*args, **kwargs) # works!
else:
# GH 23011, GH 23163
msg = ('Cannot use .str.{name} with values of inferred dtype '
'{inferred_dtype!r}.'.format(name=method_name,
inferred_dtype=inferred_dtype))
with pytest.raises(TypeError, match=msg):
method(*args, **kwargs)

def test_api_for_categorical(self, any_string_method):
# https://github.com/pandas-dev/pandas/issues/10661
Expand Down

0 comments on commit 2bab7bf

Please sign in to comment.