Skip to content

Commit

Permalink
TST: split out some sparse tests (pandas-dev#18968)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored Dec 28, 2017
1 parent dbec3c9 commit e1d5a27
Show file tree
Hide file tree
Showing 17 changed files with 302 additions and 240 deletions.
6 changes: 3 additions & 3 deletions ci/install_travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,15 @@ if [ "$PIP_BUILD_TEST" ]; then

# build & install testing
echo "[building release]"
bash scripts/build_dist_for_release.sh
time bash scripts/build_dist_for_release.sh || exit 1
conda uninstall -y cython
time pip install dist/*tar.gz --quiet || exit 1
time pip install dist/*tar.gz || exit 1

elif [ "$CONDA_BUILD_TEST" ]; then

# build & install testing
echo "[building conda recipe]"
conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test
time conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test

echo "[installing]"
conda install pandas --use-local
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ class DatetimeTZDtype(ExtensionDtype):
num = 101
base = np.dtype('M8[ns]')
_metadata = ['unit', 'tz']
_match = re.compile("(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache = {}

def __new__(cls, unit=None, tz=None):
Expand Down Expand Up @@ -514,7 +514,7 @@ class PeriodDtype(ExtensionDtype):
base = np.dtype('O')
num = 102
_metadata = ['freq']
_match = re.compile("(P|p)eriod\[(?P<freq>.+)\]")
_match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
_cache = {}

def __new__(cls, freq=None):
Expand Down Expand Up @@ -632,7 +632,7 @@ class IntervalDtype(ExtensionDtype):
base = np.dtype('O')
num = 103
_metadata = ['subtype']
_match = re.compile("(I|i)nterval\[(?P<subtype>.+)\]")
_match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]")
_cache = {}

def __new__(cls, subtype=None):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2635,7 +2635,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
allow_duplicates=allow_duplicates)

def assign(self, **kwargs):
"""
r"""
Assign new columns to a DataFrame, returning a new object
(a copy) with all the original columns in addition to the new ones.
Expand Down
24 changes: 12 additions & 12 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def str_endswith(arr, pat, na=np.nan):


def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
"""
r"""
Replace occurrences of pattern/regex in the Series/Index with
some other string. Equivalent to :meth:`str.replace` or
:func:`re.sub`.
Expand Down Expand Up @@ -598,7 +598,7 @@ def _str_extract_frame(arr, pat, flags=0):


def str_extract(arr, pat, flags=0, expand=None):
"""
r"""
For each subject string in the Series, extract groups from the
first match of regular expression pat.
Expand Down Expand Up @@ -635,23 +635,23 @@ def str_extract(arr, pat, flags=0, expand=None):
Non-matches will be NaN.
>>> s = Series(['a1', 'b2', 'c3'])
>>> s.str.extract('([ab])(\d)')
>>> s.str.extract(r'([ab])(\d)')
0 1
0 a 1
1 b 2
2 NaN NaN
A pattern may contain optional groups.
>>> s.str.extract('([ab])?(\d)')
>>> s.str.extract(r'([ab])?(\d)')
0 1
0 a 1
1 b 2
2 NaN 3
Named groups will become column names in the result.
>>> s.str.extract('(?P<letter>[ab])(?P<digit>\d)')
>>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
letter digit
0 a 1
1 b 2
Expand All @@ -660,15 +660,15 @@ def str_extract(arr, pat, flags=0, expand=None):
A pattern with one group will return a DataFrame with one column
if expand=True.
>>> s.str.extract('[ab](\d)', expand=True)
>>> s.str.extract(r'[ab](\d)', expand=True)
0
0 1
1 2
2 NaN
A pattern with one group will return a Series if expand=False.
>>> s.str.extract('[ab](\d)', expand=False)
>>> s.str.extract(r'[ab](\d)', expand=False)
0 1
1 2
2 NaN
Expand All @@ -694,7 +694,7 @@ def str_extract(arr, pat, flags=0, expand=None):


def str_extractall(arr, pat, flags=0):
"""
r"""
For each subject string in the Series, extract groups from all
matches of regular expression pat. When each subject string in the
Series has exactly one match, extractall(pat).xs(0, level='match')
Expand Down Expand Up @@ -728,7 +728,7 @@ def str_extractall(arr, pat, flags=0):
Indices with no matches will not appear in the result.
>>> s = Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
>>> s.str.extractall("[ab](\d)")
>>> s.str.extractall(r"[ab](\d)")
0
match
A 0 1
Expand All @@ -737,7 +737,7 @@ def str_extractall(arr, pat, flags=0):
Capture group names are used for column names of the result.
>>> s.str.extractall("[ab](?P<digit>\d)")
>>> s.str.extractall(r"[ab](?P<digit>\d)")
digit
match
A 0 1
Expand All @@ -746,7 +746,7 @@ def str_extractall(arr, pat, flags=0):
A pattern with two groups will return a DataFrame with two columns.
>>> s.str.extractall("(?P<letter>[ab])(?P<digit>\d)")
>>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
letter digit
match
A 0 a 1
Expand All @@ -755,7 +755,7 @@ def str_extractall(arr, pat, flags=0):
Optional groups that do not match are NaN in the result.
>>> s.str.extractall("(?P<letter>[ab])?(?P<digit>\d)")
>>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
letter digit
match
A 0 a 1
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/clipboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas.compat import StringIO, PY2


def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
r"""
Read text from clipboard and pass to read_table. See read_table for the
full argument list
Expand Down Expand Up @@ -55,10 +55,10 @@ def read_clipboard(sep='\s+', **kwargs): # pragma: no cover

counts = {x.lstrip().count('\t') for x in lines}
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
sep = '\t'
sep = r'\t'

if sep is None and kwargs.get('delim_whitespace') is None:
sep = '\s+'
sep = r'\s+'

return read_table(StringIO(text), sep=sep, **kwargs)

Expand Down Expand Up @@ -99,7 +99,7 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
if excel:
try:
if sep is None:
sep = '\t'
sep = r'\t'
buf = StringIO()
# clipboard_set (pyperclip) expects unicode
obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ def get_col_type(dtype):
buf.write('\\end{longtable}\n')

def _format_multicolumn(self, row, ilevels):
"""
r"""
Combine columns belonging to a group to a single multicolumn entry
according to self.multicolumn_format
Expand Down Expand Up @@ -1040,7 +1040,7 @@ def append_col():
return row2

def _format_multirow(self, row, ilevels, i, rows):
"""
r"""
Check following rows, whether row should be a multirow
e.g.: becomes:
Expand Down Expand Up @@ -1071,7 +1071,7 @@ def _print_cline(self, buf, i, icol):
"""
for cl in self.clinebuf:
if cl[0] == i:
buf.write('\cline{{{cl:d}-{icol:d}}}\n'
buf.write('\\cline{{{cl:d}-{icol:d}}}\n'
.format(cl=cl[1], icol=icol))
# remove entries that have been written to buffer
self.clinebuf = [x for x in self.clinebuf if x[0] != i]
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
# so we need to remove it if we see it.
_BOM = u('\ufeff')

_parser_params = """Also supports optionally iterating or breaking of the file
_parser_params = r"""Also supports optionally iterating or breaking of the file
into chunks.
Additional help can be found in the `online docs for IO Tools
Expand Down Expand Up @@ -842,19 +842,19 @@ def _clean_options(self, options, engine):
" sep=None with delim_whitespace=False"
engine = 'python'
elif sep is not None and len(sep) > 1:
if engine == 'c' and sep == '\s+':
if engine == 'c' and sep == r'\s+':
result['delim_whitespace'] = True
del result['delimiter']
elif engine not in ('python', 'python-fwf'):
# wait until regex engine integrated
fallback_reason = "the 'c' engine does not support"\
" regex separators (separators > 1 char and"\
" different from '\s+' are"\
r" different from '\s+' are"\
" interpreted as regex)"
engine = 'python'
elif delim_whitespace:
if 'python' in engine:
result['delimiter'] = '\s+'
result['delimiter'] = r'\s+'
elif sep is not None:
encodeable = True
try:
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,7 @@ def create_for_block(
# name values_0
try:
if version[0] == 0 and version[1] <= 10 and version[2] == 0:
m = re.search("values_block_(\d+)", name)
m = re.search(r"values_block_(\d+)", name)
if m:
name = "values_%s" % m.groups()[0]
except:
Expand Down Expand Up @@ -4297,7 +4297,7 @@ class AppendableMultiFrameTable(AppendableFrameTable):
table_type = u('appendable_multiframe')
obj_type = DataFrame
ndim = 2
_re_levels = re.compile("^level_\d+$")
_re_levels = re.compile(r"^level_\d+$")

@property
def table_type_short(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,7 @@ def _create_table_setup(self):
column_names_and_types = \
self._get_column_names_and_types(self._sql_type_name)

pat = re.compile('\s+')
pat = re.compile(r'\s+')
column_names = [col_name for col_name, _, _ in column_names_and_types]
if any(map(pat.search, column_names)):
warnings.warn(_SAFE_NAMES_WARNING, stacklevel=6)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def test_monotonic(self, closed):
assert idx.is_monotonic_decreasing
assert idx._is_strictly_monotonic_decreasing

@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
def test_repr(self):
i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right')
expected = ("IntervalIndex(left=[0, 1],"
Expand All @@ -619,11 +619,11 @@ def test_repr(self):
"\n dtype='interval[datetime64[ns]]')")
assert repr(i) == expected

@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
def test_repr_max_seq_item_setting(self):
super(TestIntervalIndex, self).test_repr_max_seq_item_setting()

@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
def test_repr_roundtrip(self):
super(TestIntervalIndex, self).test_repr_roundtrip()

Expand Down
Empty file.
40 changes: 40 additions & 0 deletions pandas/tests/sparse/frame/test_analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pytest
import numpy as np
from pandas import SparseDataFrame, DataFrame, SparseSeries
from pandas.util import testing as tm


@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
'(GH 17386)')
def test_quantile():
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
q = 0.1

sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)

dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseSeries(dense_expected)

tm.assert_series_equal(result, dense_expected)
tm.assert_sp_series_equal(result, sparse_expected)


@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
'(GH 17386)')
def test_quantile_multi():
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
q = [0.1, 0.5]

sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)

dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseDataFrame(dense_expected)

tm.assert_frame_equal(result, dense_expected)
tm.assert_sp_frame_equal(result, sparse_expected)
Loading

0 comments on commit e1d5a27

Please sign in to comment.