pandas-dev · jreback · Nov 27, 2018 · Nov 19, 2018 · Nov 19, 2018 · Nov 19, 2018
diff --git a/README.md b/README.md
@@ -171,7 +171,7 @@ pip install pandas
 ```
 
 ## Dependencies
-- [NumPy](https://www.numpy.org): 1.9.0 or higher
+- [NumPy](https://www.numpy.org): 1.12.0 or higher
 - [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
 - [pytz](https://pythonhosted.org/pytz): 2011k or higher
 

diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml
@@ -9,7 +9,7 @@ jobs:
   strategy:
     maxParallel: 11
     matrix:
-      py27_np_19:
+      py27_np_120:
         ENV_FILE: ci/deps/azure-27-compat.yaml
         CONDA_PY: "27"
         CONDA_ENV: pandas

diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
@@ -102,15 +102,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
     ranks = np.empty(n, dtype='f8')
 
     {{if dtype == 'object'}}
-
-    try:
-        _as = np.lexsort(keys=order)
-    except TypeError:
-        # lexsort on object array will raise TypeError for numpy version
-        # earlier than 1.11.0. Use argsort with order argument instead.
-        _dt = [('values', 'O'), ('mask', '?')]
-        _values = np.asarray(list(zip(order[0], order[1])), dtype=_dt)
-        _as = np.argsort(_values, kind='mergesort', order=('mask', 'values'))
+    _as = np.lexsort(keys=order)
     {{else}}
     if tiebreak == TIEBREAK_FIRST:
         # need to use a stable sort here

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -517,18 +517,14 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
         object val
         bint is_datelike
         ndarray result
-
-    # on 32-bit, 1.6.2 numpy M8[ns] is a subdtype of integer, which is weird
-    is_datelike = new_dtype in ['M8[ns]', 'm8[ns]']
-
+    is_datelike = new_dtype == 'm8[ns]'
     result = np.empty(n, dtype=new_dtype)
     for i in range(n):
         val = arr[i]
         if is_datelike and checknull(val):
             result[i] = NPY_NAT
         else:
             result[i] = val
-
     return result
 
 

diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
@@ -8,14 +8,6 @@ from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t,
 cnp.import_array()
 
 
-from distutils.version import LooseVersion
-
-# numpy versioning
-_np_version = np.version.short_version
-_np_version_under1p10 = LooseVersion(_np_version) < LooseVersion('1.10')
-_np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11')
-
-
 # -----------------------------------------------------------------------------
 # Preamble stuff
 

diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in
@@ -42,13 +42,6 @@ cdef inline sparse_t __mod__(sparse_t a, sparse_t b):
 cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b):
     if b == 0:
         if sparse_t is float64_t:
-            # numpy >= 1.11 returns NaN
-            # for a // 0, rather than +-inf
-            if _np_version_under1p11:
-                if a > 0:
-                    return INF
-                elif a < 0:
-                    return -INF
             return NaN
         else:
             return 0

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -98,7 +98,7 @@ def f(self, other):
                 ret[na_mask] = False
             return ret
 
-        # Numpy-1.9 and earlier may convert a scalar to a zerodim array during
+        # Numpy < 1.13 may convert a scalar to a zerodim array during
         # comparison operation when second arg has higher priority, e.g.
         #
         #     cat[0] < cat
@@ -2038,15 +2038,7 @@ def __setitem__(self, key, value):
         elif isinstance(key, slice):
             pass
 
-        # Array of True/False in Series or Categorical
-        else:
-            # There is a bug in numpy, which does not accept a Series as a
-            # indexer
-            # https://github.com/pandas-dev/pandas/issues/6168
-            # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9
-            # FIXME: remove when numpy 1.9 is the lowest numpy version pandas
-            # accepts...
-            key = np.asarray(key)
+        # else: array of True/False in Series or Categorical
 
         lindexer = self.categories.get_indexer(rvalue)
         lindexer = self._maybe_coerce_indexer(lindexer)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -835,8 +835,7 @@ def __isub__(self, other):
     def _evaluate_compare(self, other, op):
         """
         We have been called because a comparison between
-        8 aware arrays. numpy >= 1.11 will
-        now warn about NaT comparisons
+        8 aware arrays. numpy will warn about NaT comparisons
         """
         # Called by comparison methods when comparing datetimelike
         # with datetimelike

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -1015,9 +1015,6 @@ def __getitem__(self, key):
                     key = np.asarray(key)
 
             if com.is_bool_indexer(key) and len(self) == len(key):
-                # TODO(numpy 1.11): Remove this asarray.
-                # Old NumPy didn't treat array-like as boolean masks.
-                key = np.asarray(key)
                 return self.take(np.arange(len(key), dtype=np.int32)[key])
             elif hasattr(key, '__len__'):
                 return self.take(key)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -267,6 +267,8 @@ def maybe_promote(dtype, fill_value=np.nan):
         # for now: refuse to upcast datetime64
         # (this is because datetime64 will not implicitly upconvert
         #  to object correctly as of numpy 1.6.1)
+        # TODO: remove old numpy compat code (without introducing segfault for
+        #       tests/test_take.py::TestTake::test_2d_datetime64)
         if isna(fill_value):
             fill_value = iNaT
         else:
@@ -723,19 +725,30 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False):
 
     elif is_object_dtype(arr):
 
-        # work around NumPy brokenness, #1987
-        if np.issubdtype(dtype.type, np.integer):
-            return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
-
         # if we have a datetime/timedelta array of objects
         # then coerce to a proper dtype and recall astype_nansafe
 
+        if is_timedelta64_dtype(dtype):
+            # TODO: this is an old numpy compat branch that is not necessary
+            # anymore for its original purpose (unsafe casting from object to
+            # int, see GH 1987).
+            # Currently, timedelta dtypes get routed through here; whereas
+            # uncommenting them would re-call (see below)
+            # >>> astype_nansafe(to_timedelta(arr).values, dtype, copy=copy),
+            # and end up in the `is_timedelta64_dtype(arr)` above, which
+            # explicitly and deliberately returns a float dtype.
+            # However, the test
+            # reshape/merge/test_merge.py::TestMerge:;test_other_timedelta_unit
+            # expects an explicit timedelta dtype as output.
+            # Once this is fixed, `astype_intsafe` can be deleted from lib.
+            return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
+
         elif is_datetime64_dtype(dtype):
             from pandas import to_datetime
             return astype_nansafe(to_datetime(arr).values, dtype, copy=copy)
-        elif is_timedelta64_dtype(dtype):
-            from pandas import to_timedelta
-            return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)
+        # elif is_timedelta64_dtype(dtype):
+        #     from pandas import to_timedelta
+        #     return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)
 
     if dtype.name in ("datetime64", "timedelta64"):
         msg = ("The '{dtype}' dtype has no unit. "

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -338,12 +338,7 @@ def _hash_categories(categories, ordered=True):
             cat_array = [cat_array]
         hashed = _combine_hash_arrays(iter(cat_array),
                                       num_items=len(cat_array))
-        if len(hashed) == 0:
-            # bug in Numpy<1.12 for length 0 arrays. Just return the correct
-            # value of 0
-            return 0
-        else:
-            return np.bitwise_xor.reduce(hashed)
+        return np.bitwise_xor.reduce(hashed)
 
     @classmethod
     def construct_array_type(cls):

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -408,6 +408,7 @@ def array_equivalent(left, right, strict_nan=False):
 
     # Object arrays can contain None, NaN and NaT.
     # string dtypes must be come to this path for NumPy 1.7.1 compat
+    # TODO: remove old numpy compat code (or comment)
     if is_string_dtype(left) or is_string_dtype(right):
 
         if not strict_nan:

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -418,11 +418,6 @@ def __setstate__(self, state):
                 self._freq = own_state[1]
                 self._tz = timezones.tz_standardize(own_state[2])
 
-                # provide numpy < 1.7 compat
-                if nd_state[2] == 'M8[us]':
-                    new_state = np.ndarray.__reduce__(data.astype('M8[ns]'))
-                    np.ndarray.__setstate__(data, new_state[2])
-
             else:  # pragma: no cover
                 data = np.empty(state)
                 np.ndarray.__setstate__(data, state)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1458,11 +1458,6 @@ def quantile(self, qs, interpolation='linear', axis=0, axes=None):
 
         def _nanpercentile1D(values, mask, q, **kw):
             # mask is Union[ExtensionArray, ndarray]
-            # we convert to an ndarray for NumPy 1.9 compat, which didn't
-            # treat boolean-like arrays as boolean. This conversion would have
-            # been done inside ndarray.__getitem__ anyway, since values is
-            # an ndarray at this point.
-            mask = np.asarray(mask)
             values = values[~mask]
 
             if len(values) == 0:
@@ -2781,9 +2776,7 @@ def set(self, locs, values, check=False):
         -------
         None
         """
-        if values.dtype != _NS_DTYPE:
-            # Workaround for numpy 1.6 bug
-            values = conversion.ensure_datetime64ns(values)
+        values = conversion.ensure_datetime64ns(values, copy=False)
 
         self.values[locs] = values
 
@@ -3102,7 +3095,7 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
         # FIXME: optimization potential in case all mgrs contain slices and
         # combination of those slices is a slice, too.
         new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks])
-        new_values = _vstack([b.values for b in blocks], dtype)
+        new_values = np.vstack([b.values for b in blocks])
 
         argsort = np.argsort(new_mgr_locs)
         new_values = new_values[argsort]
@@ -3114,17 +3107,6 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
     return blocks
 
 
-def _vstack(to_stack, dtype):
-
-    # work around NumPy 1.6 bug
-    if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
-        new_values = np.vstack([x.view('i8') for x in to_stack])
-        return new_values.view(dtype)
-
-    else:
-        return np.vstack(to_stack)
-
-
 def _block2d_to_blocknd(values, placement, shape, labels, ref_items):
     """ pivot to the labels shape """
     panel_shape = (len(placement),) + shape

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -248,9 +248,6 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         def unpickle_block(values, mgr_locs):
-            # numpy < 1.7 pickle compat
-            if values.dtype == 'M8[us]':
-                values = values.astype('M8[ns]')
             return make_block(values, placement=mgr_locs)
 
         if (isinstance(state, tuple) and len(state) >= 4 and
@@ -776,18 +773,6 @@ def _interleave(self):
 
         result = np.empty(self.shape, dtype=dtype)
 
-        if result.shape[0] == 0:
-            # Workaround for numpy 1.7 bug:
-            #
-            #     >>> a = np.empty((0,10))
-            #     >>> a[slice(0,0)]
-            #     array([], shape=(0, 10), dtype=float64)
-            #     >>> a[[]]
-            #     Traceback (most recent call last):
-            #       File "<stdin>", line 1, in <module>
-            #     IndexError: index 0 is out of bounds for axis 0 with size 0
-            return result
-
         itemmask = np.zeros(self.shape[0])
 
         for blk in self.blocks:
@@ -1170,8 +1155,7 @@ def insert(self, loc, item, value, allow_duplicates=False):
                 blk.mgr_locs = new_mgr_locs
 
         if loc == self._blklocs.shape[0]:
-            # np.append is a lot faster (at least in numpy 1.7.1), let's use it
-            # if we can.
+            # np.append is a lot faster, let's use it if we can.
             self._blklocs = np.append(self._blklocs, 0)
             self._blknos = np.append(self._blknos, len(self.blocks))
         else:
@@ -1995,13 +1979,9 @@ def _transform_index(index, func, level=None):
 
 def _fast_count_smallints(arr):
     """Faster version of set(arr) for sequences of small numbers."""
-    if len(arr) == 0:
-        # Handle empty arr case separately: numpy 1.6 chokes on that.
-        return np.empty((0, 2), dtype=arr.dtype)
-    else:
-        counts = np.bincount(arr.astype(np.int_))
-        nz = counts.nonzero()[0]
-        return np.c_[nz, counts[nz]]
+    counts = np.bincount(arr.astype(np.int_))
+    nz = counts.nonzero()[0]
+    return np.c_[nz, counts[nz]]
 
 
 def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -334,8 +334,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
     ids = ensure_int64(bins.searchsorted(x, side=side))
 
     if include_lowest:
-        # Numpy 1.9 support: ensure this mask is a Numpy array
-        ids[np.asarray(x == bins[0])] = 1
+        ids[x == bins[0]] = 1
 
     na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
     has_nas = na_mask.any()

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
@@ -251,7 +251,7 @@ def dtype_for(t):
             'complex128': np.float64,
             'complex64': np.float32}
 
-# numpy 1.6.1 compat
+# windows (32 bit) compat
 if hasattr(np, 'float128'):
     c2f_dict['complex256'] = np.float128
 

diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
@@ -6,8 +6,6 @@
 
 from pandas.compat import PY3, BytesIO, cPickle as pkl, pickle_compat as pc
 
-from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64_dtype
-
 from pandas.io.common import _get_handle, _stringify_path
 
 
@@ -200,10 +198,4 @@ def _pickle_array(arr):
 def _unpickle_array(bytes):
     arr = read_array(BytesIO(bytes))
 
-    # All datetimes should be stored as M8[ns].  When unpickling with
-    # numpy1.6, it will read these as M8[us].  So this ensures all
-    # datetime64 types are read as MS[ns]
-    if is_datetime64_dtype(arr):
-        arr = arr.view(_NS_DTYPE)
-
     return arr
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
@@ -138,9 +138,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
 
 def _get_marker_compat(marker):
     import matplotlib.lines as mlines
-    import matplotlib as mpl
-    if mpl.__version__ < '1.1.0' and marker == '.':
-        return 'o'
     if marker not in mlines.lineMarkers:
         return 'o'
     return marker

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -37,7 +37,6 @@ def test_big_print(self):
     def test_empty_print(self):
         factor = Categorical([], ["a", "b", "c"])
         expected = ("[], Categories (3, object): [a, b, c]")
-        # hack because array_repr changed in numpy > 1.6.x
         actual = repr(factor)
         assert actual == expected
 

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -1272,10 +1272,7 @@ def test_nan_to_nat_conversions():
     s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan)
     assert (isna(s[8]))
 
-    # numpy < 1.7.0 is wrong
-    from distutils.version import LooseVersion
-    if LooseVersion(np.__version__) >= LooseVersion('1.7.0'):
-        assert (s[8].value == np.datetime64('NaT').astype(np.int64))
+    assert (s[8].value == np.datetime64('NaT').astype(np.int64))
 
 
 @td.skip_if_no_scipy