BUG: block mutation of read-only array in series

Author: Joe Jevnik <[email protected]> Closes #14359 from llllllllll/series-setitem and squashes the following commits: 9925327 [Joe Jevnik] BUG: fix a bug in Series.__setitem__ that allowed the mutatation of read-only arrays
pandas-dev · Oct 24, 2016 · 5cf6d94 · 5cf6d94
1 parent bee90a7
commit 5cf6d94
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 7 deletions.
diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt
@@ -46,6 +46,7 @@ Bug Fixes
 - Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`).
 - Bug in union of differences from a ``DatetimeIndex`; this is a regression in 0.19.0 from 0.18.1 (:issue:`14323`)
 
+- Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`).
 
 
 - Source installs from PyPI will now work without ``cython`` installed, as in previous versions (:issue:`14204`)
@@ -62,5 +63,4 @@ Bug Fixes
 - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
 - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
 
-- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
-  is not scalar and ``values`` is not specified (:issue:`14380`)
+- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` is not scalar and ``values`` is not specified (:issue:`14380`)
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
@@ -975,7 +975,9 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
         if is_datelike and checknull(v):
             result[i] = NPY_NAT
         else:
-            util.set_value_at(result, i, v)
+            # we can use the unsafe version because we know `result` is mutable
+            # since it was created from `np.empty`
+            util.set_value_at_unsafe(result, i, v)
 
     return result
 
@@ -986,7 +988,9 @@ cpdef ndarray[object] astype_unicode(ndarray arr):
         ndarray[object] result = np.empty(n, dtype=object)
 
     for i in range(n):
-        util.set_value_at(result, i, unicode(arr[i]))
+        # we can use the unsafe version because we know `result` is mutable
+        # since it was created from `np.empty`
+        util.set_value_at_unsafe(result, i, unicode(arr[i]))
 
     return result
 
@@ -997,7 +1001,9 @@ cpdef ndarray[object] astype_str(ndarray arr):
         ndarray[object] result = np.empty(n, dtype=object)
 
     for i in range(n):
-        util.set_value_at(result, i, str(arr[i]))
+        # we can use the unsafe version because we know `result` is mutable
+        # since it was created from `np.empty`
+        util.set_value_at_unsafe(result, i, str(arr[i]))
 
     return result
 

diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd
@@ -70,7 +70,12 @@ cdef inline object get_value_at(ndarray arr, object loc):
 
     return get_value_1d(arr, i)
 
-cdef inline set_value_at(ndarray arr, object loc, object value):
+cdef inline set_value_at_unsafe(ndarray arr, object loc, object value):
+    """Sets a value into the array without checking the writeable flag.
+
+    This should be used when setting values in a loop, check the writeable
+    flag above the loop and then eschew the check on each iteration.
+    """
     cdef:
         Py_ssize_t i, sz
     if is_float_object(loc):
@@ -87,6 +92,14 @@ cdef inline set_value_at(ndarray arr, object loc, object value):
 
     assign_value_1d(arr, i, value)
 
+cdef inline set_value_at(ndarray arr, object loc, object value):
+    """Sets a value into the array after checking that the array is mutable.
+    """
+    if not cnp.PyArray_ISWRITEABLE(arr):
+        raise ValueError('assignment destination is read-only')
+
+    set_value_at_unsafe(arr, loc, value)
+
 cdef inline int is_contiguous(ndarray arr):
     return cnp.PyArray_CHKFLAGS(arr, cnp.NPY_C_CONTIGUOUS)
 

diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py
@@ -1947,6 +1947,40 @@ def test_multilevel_preserve_name(self):
         self.assertEqual(result.name, s.name)
         self.assertEqual(result2.name, s.name)
 
+    def test_setitem_scalar_into_readonly_backing_data(self):
+        # GH14359: test that you cannot mutate a read only buffer
+
+        array = np.zeros(5)
+        array.flags.writeable = False  # make the array immutable
+        series = Series(array)
+
+        for n in range(len(series)):
+            with self.assertRaises(ValueError):
+                series[n] = 1
+
+            self.assertEqual(
+                array[n],
+                0,
+                msg='even though the ValueError was raised, the underlying'
+                ' array was still mutated!',
+            )
+
+    def test_setitem_slice_into_readonly_backing_data(self):
+        # GH14359: test that you cannot mutate a read only buffer
+
+        array = np.zeros(5)
+        array.flags.writeable = False  # make the array immutable
+        series = Series(array)
+
+        with self.assertRaises(ValueError):
+            series[1:3] = 1
+
+        self.assertTrue(
+            not array.any(),
+            msg='even though the ValueError was raised, the underlying'
+            ' array was still mutated!',
+        )
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

diff --git a/setup.py b/setup.py
@@ -476,7 +476,8 @@ def pxd(name):
                          'pandas/src/period_helper.c']},
     index={'pyxfile': 'index',
            'sources': ['pandas/src/datetime/np_datetime.c',
-                       'pandas/src/datetime/np_datetime_strings.c']},
+                       'pandas/src/datetime/np_datetime_strings.c'],
+           'pxdfiles': ['src/util']},
     algos={'pyxfile': 'algos',
            'pxdfiles': ['src/util'],
            'depends': _pxi_dep['algos']},