TST: split out some sparse tests (pandas-dev#18968)

jreback · Dec 28, 2017 · e1d5a27 · e1d5a27
1 parent dbec3c9
commit e1d5a27
Show file tree

Hide file tree

Showing 17 changed files with 302 additions and 240 deletions.
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
@@ -178,15 +178,15 @@ if [ "$PIP_BUILD_TEST" ]; then
 
     # build & install testing
     echo "[building release]"
-    bash scripts/build_dist_for_release.sh
+    time bash scripts/build_dist_for_release.sh || exit 1
     conda uninstall -y cython
-    time pip install dist/*tar.gz --quiet || exit 1
+    time pip install dist/*tar.gz || exit 1
 
 elif [ "$CONDA_BUILD_TEST" ]; then
 
     # build & install testing
     echo "[building conda recipe]"
-    conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test
+    time conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test
 
     echo "[installing]"
     conda install pandas --use-local

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -402,7 +402,7 @@ class DatetimeTZDtype(ExtensionDtype):
     num = 101
     base = np.dtype('M8[ns]')
     _metadata = ['unit', 'tz']
-    _match = re.compile("(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
+    _match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
     _cache = {}
 
     def __new__(cls, unit=None, tz=None):
@@ -514,7 +514,7 @@ class PeriodDtype(ExtensionDtype):
     base = np.dtype('O')
     num = 102
     _metadata = ['freq']
-    _match = re.compile("(P|p)eriod\[(?P<freq>.+)\]")
+    _match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
     _cache = {}
 
     def __new__(cls, freq=None):
@@ -632,7 +632,7 @@ class IntervalDtype(ExtensionDtype):
     base = np.dtype('O')
     num = 103
     _metadata = ['subtype']
-    _match = re.compile("(I|i)nterval\[(?P<subtype>.+)\]")
+    _match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]")
     _cache = {}
 
     def __new__(cls, subtype=None):

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2635,7 +2635,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
                           allow_duplicates=allow_duplicates)
 
     def assign(self, **kwargs):
-        """
+        r"""
         Assign new columns to a DataFrame, returning a new object
         (a copy) with all the original columns in addition to the new ones.
 

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -306,7 +306,7 @@ def str_endswith(arr, pat, na=np.nan):
 
 
 def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
-    """
+    r"""
     Replace occurrences of pattern/regex in the Series/Index with
     some other string. Equivalent to :meth:`str.replace` or
     :func:`re.sub`.
@@ -598,7 +598,7 @@ def _str_extract_frame(arr, pat, flags=0):
 
 
 def str_extract(arr, pat, flags=0, expand=None):
-    """
+    r"""
     For each subject string in the Series, extract groups from the
     first match of regular expression pat.
 
@@ -635,23 +635,23 @@ def str_extract(arr, pat, flags=0, expand=None):
     Non-matches will be NaN.
 
     >>> s = Series(['a1', 'b2', 'c3'])
-    >>> s.str.extract('([ab])(\d)')
+    >>> s.str.extract(r'([ab])(\d)')
          0    1
     0    a    1
     1    b    2
     2  NaN  NaN
 
     A pattern may contain optional groups.
 
-    >>> s.str.extract('([ab])?(\d)')
+    >>> s.str.extract(r'([ab])?(\d)')
          0  1
     0    a  1
     1    b  2
     2  NaN  3
 
     Named groups will become column names in the result.
 
-    >>> s.str.extract('(?P<letter>[ab])(?P<digit>\d)')
+    >>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
       letter digit
     0      a     1
     1      b     2
@@ -660,15 +660,15 @@ def str_extract(arr, pat, flags=0, expand=None):
     A pattern with one group will return a DataFrame with one column
     if expand=True.
 
-    >>> s.str.extract('[ab](\d)', expand=True)
+    >>> s.str.extract(r'[ab](\d)', expand=True)
          0
     0    1
     1    2
     2  NaN
 
     A pattern with one group will return a Series if expand=False.
 
-    >>> s.str.extract('[ab](\d)', expand=False)
+    >>> s.str.extract(r'[ab](\d)', expand=False)
     0      1
     1      2
     2    NaN
@@ -694,7 +694,7 @@ def str_extract(arr, pat, flags=0, expand=None):
 
 
 def str_extractall(arr, pat, flags=0):
-    """
+    r"""
     For each subject string in the Series, extract groups from all
     matches of regular expression pat. When each subject string in the
     Series has exactly one match, extractall(pat).xs(0, level='match')
@@ -728,7 +728,7 @@ def str_extractall(arr, pat, flags=0):
     Indices with no matches will not appear in the result.
 
     >>> s = Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
-    >>> s.str.extractall("[ab](\d)")
+    >>> s.str.extractall(r"[ab](\d)")
              0
       match
     A 0      1
@@ -737,7 +737,7 @@ def str_extractall(arr, pat, flags=0):
 
     Capture group names are used for column names of the result.
 
-    >>> s.str.extractall("[ab](?P<digit>\d)")
+    >>> s.str.extractall(r"[ab](?P<digit>\d)")
             digit
       match
     A 0         1
@@ -746,7 +746,7 @@ def str_extractall(arr, pat, flags=0):
 
     A pattern with two groups will return a DataFrame with two columns.
 
-    >>> s.str.extractall("(?P<letter>[ab])(?P<digit>\d)")
+    >>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
             letter digit
       match
     A 0          a     1
@@ -755,7 +755,7 @@ def str_extractall(arr, pat, flags=0):
 
     Optional groups that do not match are NaN in the result.
 
-    >>> s.str.extractall("(?P<letter>[ab])?(?P<digit>\d)")
+    >>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
             letter digit
       match
     A 0          a     1

diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
@@ -3,7 +3,7 @@
 from pandas.compat import StringIO, PY2
 
 
-def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
+def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
     r"""
     Read text from clipboard and pass to read_table. See read_table for the
     full argument list
@@ -55,10 +55,10 @@ def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
 
     counts = {x.lstrip().count('\t') for x in lines}
     if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
-        sep = '\t'
+        sep = r'\t'
 
     if sep is None and kwargs.get('delim_whitespace') is None:
-        sep = '\s+'
+        sep = r'\s+'
 
     return read_table(StringIO(text), sep=sep, **kwargs)
 
@@ -99,7 +99,7 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs):  # pragma: no cover
     if excel:
         try:
             if sep is None:
-                sep = '\t'
+                sep = r'\t'
             buf = StringIO()
             # clipboard_set (pyperclip) expects unicode
             obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1002,7 +1002,7 @@ def get_col_type(dtype):
             buf.write('\\end{longtable}\n')
 
     def _format_multicolumn(self, row, ilevels):
-        """
+        r"""
         Combine columns belonging to a group to a single multicolumn entry
         according to self.multicolumn_format
 
@@ -1040,7 +1040,7 @@ def append_col():
         return row2
 
     def _format_multirow(self, row, ilevels, i, rows):
-        """
+        r"""
         Check following rows, whether row should be a multirow
 
         e.g.:     becomes:
@@ -1071,7 +1071,7 @@ def _print_cline(self, buf, i, icol):
         """
         for cl in self.clinebuf:
             if cl[0] == i:
-                buf.write('\cline{{{cl:d}-{icol:d}}}\n'
+                buf.write('\\cline{{{cl:d}-{icol:d}}}\n'
                           .format(cl=cl[1], icol=icol))
         # remove entries that have been written to buffer
         self.clinebuf = [x for x in self.clinebuf if x[0] != i]

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -51,7 +51,7 @@
 # so we need to remove it if we see it.
 _BOM = u('\ufeff')
 
-_parser_params = """Also supports optionally iterating or breaking of the file
+_parser_params = r"""Also supports optionally iterating or breaking of the file
 into chunks.
 
 Additional help can be found in the `online docs for IO Tools
@@ -842,19 +842,19 @@ def _clean_options(self, options, engine):
                                   " sep=None with delim_whitespace=False"
                 engine = 'python'
         elif sep is not None and len(sep) > 1:
-            if engine == 'c' and sep == '\s+':
+            if engine == 'c' and sep == r'\s+':
                 result['delim_whitespace'] = True
                 del result['delimiter']
             elif engine not in ('python', 'python-fwf'):
                 # wait until regex engine integrated
                 fallback_reason = "the 'c' engine does not support"\
                                   " regex separators (separators > 1 char and"\
-                                  " different from '\s+' are"\
+                                  r" different from '\s+' are"\
                                   " interpreted as regex)"
                 engine = 'python'
         elif delim_whitespace:
             if 'python' in engine:
-                result['delimiter'] = '\s+'
+                result['delimiter'] = r'\s+'
         elif sep is not None:
             encodeable = True
             try:

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -1792,7 +1792,7 @@ def create_for_block(
         # name values_0
         try:
             if version[0] == 0 and version[1] <= 10 and version[2] == 0:
-                m = re.search("values_block_(\d+)", name)
+                m = re.search(r"values_block_(\d+)", name)
                 if m:
                     name = "values_%s" % m.groups()[0]
         except:
@@ -4297,7 +4297,7 @@ class AppendableMultiFrameTable(AppendableFrameTable):
     table_type = u('appendable_multiframe')
     obj_type = DataFrame
     ndim = 2
-    _re_levels = re.compile("^level_\d+$")
+    _re_levels = re.compile(r"^level_\d+$")
 
     @property
     def table_type_short(self):

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -1306,7 +1306,7 @@ def _create_table_setup(self):
         column_names_and_types = \
             self._get_column_names_and_types(self._sql_type_name)
 
-        pat = re.compile('\s+')
+        pat = re.compile(r'\s+')
         column_names = [col_name for col_name, _, _ in column_names_and_types]
         if any(map(pat.search, column_names)):
             warnings.warn(_SAFE_NAMES_WARNING, stacklevel=6)

diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
@@ -599,7 +599,7 @@ def test_monotonic(self, closed):
         assert idx.is_monotonic_decreasing
         assert idx._is_strictly_monotonic_decreasing
 
-    @pytest.mark.xfail(reason='not a valid repr as we use interval notation')
+    @pytest.mark.skip(reason='not a valid repr as we use interval notation')
     def test_repr(self):
         i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right')
         expected = ("IntervalIndex(left=[0, 1],"
@@ -619,11 +619,11 @@ def test_repr(self):
                     "\n              dtype='interval[datetime64[ns]]')")
         assert repr(i) == expected
 
-    @pytest.mark.xfail(reason='not a valid repr as we use interval notation')
+    @pytest.mark.skip(reason='not a valid repr as we use interval notation')
     def test_repr_max_seq_item_setting(self):
         super(TestIntervalIndex, self).test_repr_max_seq_item_setting()
 
-    @pytest.mark.xfail(reason='not a valid repr as we use interval notation')
+    @pytest.mark.skip(reason='not a valid repr as we use interval notation')
     def test_repr_roundtrip(self):
         super(TestIntervalIndex, self).test_repr_roundtrip()
 

diff --git a/pandas/tests/sparse/frame/__init__.py b/pandas/tests/sparse/frame/__init__.py
diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py
@@ -0,0 +1,40 @@
+import pytest
+import numpy as np
+from pandas import SparseDataFrame, DataFrame, SparseSeries
+from pandas.util import testing as tm
+
+
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
+                   '(GH 17386)')
+def test_quantile():
+    # GH 17386
+    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
+    q = 0.1
+
+    sparse_df = SparseDataFrame(data)
+    result = sparse_df.quantile(q)
+
+    dense_df = DataFrame(data)
+    dense_expected = dense_df.quantile(q)
+    sparse_expected = SparseSeries(dense_expected)
+
+    tm.assert_series_equal(result, dense_expected)
+    tm.assert_sp_series_equal(result, sparse_expected)
+
+
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
+                   '(GH 17386)')
+def test_quantile_multi():
+    # GH 17386
+    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
+    q = [0.1, 0.5]
+
+    sparse_df = SparseDataFrame(data)
+    result = sparse_df.quantile(q)
+
+    dense_df = DataFrame(data)
+    dense_expected = dense_df.quantile(q)
+    sparse_expected = SparseDataFrame(dense_expected)
+
+    tm.assert_frame_equal(result, dense_expected)
+    tm.assert_sp_frame_equal(result, sparse_expected)