CLN: remove uneeded code in internals; use split_and_operate when pos…

…sible
pandas-dev · Jul 12, 2017 · 054e89e · 054e89e
1 parent 7b1fd80
commit 054e89e
Showing 1 changed file with 69 additions and 92 deletions.
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -389,12 +389,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
             blocks = [b.make_block(values=self._try_coerce_result(b.values))
                       for b in blocks]
             return self._maybe_downcast(blocks, downcast)
-        except (TypeError, ValueError) as e:
-
-            if self.is_timedelta:
-                raise AssertionError(
-                    "causing recursion error: {} {} {} {}".format(
-                        self, value, type(value), e))
+        except (TypeError, ValueError):
 
             # we can't process the value, but nothing to do
             if not mask.any():
@@ -431,6 +426,8 @@ def split_and_operate(self, mask, f, inplace):
         list of blocks
         """
 
+        if mask is None:
+            mask = np.ones(self.shape, dtype=bool)
         new_values = self.values
 
         def make_a_block(nv, ref_loc):
@@ -441,7 +438,10 @@ def make_a_block(nv, ref_loc):
             else:
                 # Put back the dimension that was taken from it and make
                 # a block out of the result.
-                nv = _block_shape(nv, ndim=self.ndim)
+                try:
+                    nv = _block_shape(nv, ndim=self.ndim)
+                except (AttributeError, NotImplementedError):
+                    pass
                 block = self.make_block(values=nv,
                                         placement=ref_loc, fastpath=True)
             return block
@@ -512,27 +512,20 @@ def downcast(self, dtypes=None, mgr=None):
             raise ValueError("downcast must have a dictionary or 'infer' as "
                              "its argument")
 
-        # item-by-item
+        # operate column-by-column
         # this is expensive as it splits the blocks items-by-item
-        blocks = []
-        for i, rl in enumerate(self.mgr_locs):
+        def f(m, v, i):
 
             if dtypes == 'infer':
                 dtype = 'infer'
             else:
                 raise AssertionError("dtypes as dict is not supported yet")
-                # TODO: This either should be completed or removed
-                dtype = dtypes.get(item, self._downcast_dtype)  # noqa
 
-            if dtype is None:
-                nv = _block_shape(values[i], ndim=self.ndim)
-            else:
-                nv = maybe_downcast_to_dtype(values[i], dtype)
-                nv = _block_shape(nv, ndim=self.ndim)
-
-            blocks.append(self.make_block(nv, fastpath=True, placement=[rl]))
+            if dtype is not None:
+                v = maybe_downcast_to_dtype(v, dtype)
+            return v
 
-        return blocks
+        return self.split_and_operate(None, f, False)
 
     def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
         return self._astype(dtype, copy=copy, errors=errors, values=values,
@@ -655,10 +648,6 @@ def _try_cast_result(self, result, dtype=None):
         # may need to change the dtype here
         return maybe_downcast_to_dtype(result, dtype)
 
-    def _try_operate(self, values):
-        """ return a version to operate on as the input """
-        return values
-
     def _try_coerce_args(self, values, other):
         """ provide coercion to our input arguments """
 
@@ -680,9 +669,6 @@ def _try_coerce_and_cast_result(self, result, dtype=None):
         result = self._try_cast_result(result, dtype=dtype)
         return result
 
-    def _try_fill(self, value):
-        return value
-
     def to_native_types(self, slicer=None, na_rep='nan', quoting=None,
                         **kwargs):
         """ convert to our native types format, slicing if desired """
@@ -1120,7 +1106,6 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
 
         values = self.values if inplace else self.values.copy()
         values, _, fill_value, _ = self._try_coerce_args(values, fill_value)
-        values = self._try_operate(values)
         values = missing.interpolate_2d(values, method=method, axis=axis,
                                         limit=limit, fill_value=fill_value,
                                         dtype=self.dtype)
@@ -1834,10 +1819,6 @@ def _na_value(self):
     def fill_value(self):
         return tslib.iNaT
 
-    def _try_operate(self, values):
-        """ return a version to operate on """
-        return values.view('i8')
-
     def get_values(self, dtype=None):
         """
         return object dtype as boxed values, such as Timestamps/Timedelta
@@ -2031,31 +2012,24 @@ def convert(self, *args, **kwargs):
             if key in kwargs:
                 fn_kwargs[key] = kwargs[key]
 
-        # attempt to create new type blocks
-        blocks = []
-        if by_item and not self._is_single_block:
-
-            for i, rl in enumerate(self.mgr_locs):
-                values = self.iget(i)
+        # operate column-by-column
+        def f(m, v, i):
+            shape = v.shape
+            values = fn(v.ravel(), **fn_kwargs)
+            try:
+                values = values.reshape(shape)
+                values = _block_shape(values, ndim=self.ndim)
+            except (AttributeError, NotImplementedError):
+                pass
 
-                shape = values.shape
-                values = fn(values.ravel(), **fn_kwargs)
-                try:
-                    values = values.reshape(shape)
-                    values = _block_shape(values, ndim=self.ndim)
-                except (AttributeError, NotImplementedError):
-                    pass
-                newb = make_block(values, ndim=self.ndim, placement=[rl])
-                blocks.append(newb)
+            return values
 
+        if by_item and not self._is_single_block:
+            blocks = self.split_and_operate(None, f, False)
         else:
-            values = fn(self.values.ravel(), **fn_kwargs)
-            try:
-                values = values.reshape(self.values.shape)
-            except NotImplementedError:
-                pass
-            blocks.append(make_block(values, ndim=self.ndim,
-                                     placement=self.mgr_locs))
+            values = f(None, self.values.ravel(), None)
+            blocks = [make_block(values, ndim=self.ndim,
+                                 placement=self.mgr_locs)]
 
         return blocks
 
@@ -4878,17 +4852,30 @@ def _transform_index(index, func, level=None):
 
 def _putmask_smart(v, m, n):
     """
-    Return a new block, try to preserve dtype if possible.
+    Return a new ndarray, try to preserve dtype if possible.
 
     Parameters
     ----------
     v : `values`, updated in-place (array like)
     m : `mask`, applies to both sides (array like)
     n : `new values` either scalar or an array like aligned with `values`
+
+    Returns
+    -------
+    values : ndarray with updated values
+        this *may* be a copy of the original
+
+    See Also
+    --------
+    ndarray.putmask
     """
+
+    # we cannot use np.asarray() here as we cannot have conversions
+    # that numpy does when numeric are mixed with strings
+
     # n should be the length of the mask or a scalar here
     if not is_list_like(n):
-        n = np.array([n] * len(m))
+        n = np.repeat(n, len(m))
     elif isinstance(n, np.ndarray) and n.ndim == 0:  # numpy scalar
         n = np.repeat(np.array(n, ndmin=1), len(m))
 
@@ -4907,56 +4894,46 @@ def _putmask_smart(v, m, n):
             nn_at = nn.astype(v.dtype)
 
         # avoid invalid dtype comparisons
-        if not is_numeric_v_string_like(nn, nn_at):
-
-            # only compare integers/floats
-            # don't compare integers to datetimelikes
-            if (is_float_dtype(nn.dtype) or
-                    is_integer_dtype(nn.dtype) and
-                    is_float_dtype(nn_at.dtype) or
-                    is_integer_dtype(nn_at.dtype)):
-                comp = (nn == nn_at)
-                if is_list_like(comp) and comp.all():
-                    nv = v.copy()
-                    nv[m] = nn_at
-                    return nv
+        # between numbers & strings
+
+        # only compare integers/floats
+        # don't compare integers to datetimelikes
+        if (not is_numeric_v_string_like(nn, nn_at) and
+            (is_float_dtype(nn.dtype) or
+             is_integer_dtype(nn.dtype) and
+             is_float_dtype(nn_at.dtype) or
+             is_integer_dtype(nn_at.dtype))):
+
+            comp = (nn == nn_at)
+            if is_list_like(comp) and comp.all():
+                nv = v.copy()
+                nv[m] = nn_at
+                return nv
     except (ValueError, IndexError, TypeError):
         pass
 
     n = np.asarray(n)
 
-    # preserves dtype if possible
-    if v.dtype.kind == n.dtype.kind:
+    def _putmask_preserve(nv, n):
         try:
-            v[m] = n[m]
-            return v
-        except:
-            pass
+            nv[m] = n[m]
+        except (IndexError, ValueError):
+            nv[m] = n
+        return nv
 
-        try:
-            v[m] = n
-            return v
-        except:
-            pass
+    # preserves dtype if possible
+    if v.dtype.kind == n.dtype.kind:
+        return _putmask_preserve(v, n)
 
     # change the dtype if needed
     dtype, _ = maybe_promote(n.dtype)
 
     if is_extension_type(v.dtype) and is_object_dtype(dtype):
-        nv = v.get_values(dtype)
+        v = v.get_values(dtype)
     else:
-        nv = v.astype(dtype)
+        v = v.astype(dtype)
 
-    try:
-        nv[m] = n[m]
-    except ValueError:
-        idx, = np.where(np.squeeze(m))
-        for mask_index, new_val in zip(idx, n):
-            nv[mask_index] = new_val
-    except IndexError:
-        nv[m] = n
-
-    return nv
+    return _putmask_preserve(v, n)
 
 
 def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):