Skip to content

Commit

Permalink
BUG: block mutation of read-only array in series
Browse files Browse the repository at this point in the history
Author: Joe Jevnik <[email protected]>

Closes #14359 from llllllllll/series-setitem and squashes the following commits:

9925327 [Joe Jevnik] BUG: fix a bug in Series.__setitem__ that allowed the mutatation of read-only arrays
  • Loading branch information
Joe Jevnik authored and jreback committed Oct 24, 2016
1 parent bee90a7 commit 5cf6d94
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 7 deletions.
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.19.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Bug Fixes
- Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`).
- Bug in union of differences from a ``DatetimeIndex`; this is a regression in 0.19.0 from 0.18.1 (:issue:`14323`)

- Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`).


- Source installs from PyPI will now work without ``cython`` installed, as in previous versions (:issue:`14204`)
Expand All @@ -62,5 +63,4 @@ Bug Fixes
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)

- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
is not scalar and ``values`` is not specified (:issue:`14380`)
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` is not scalar and ``values`` is not specified (:issue:`14380`)
12 changes: 9 additions & 3 deletions pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,9 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
if is_datelike and checknull(v):
result[i] = NPY_NAT
else:
util.set_value_at(result, i, v)
# we can use the unsafe version because we know `result` is mutable
# since it was created from `np.empty`
util.set_value_at_unsafe(result, i, v)

return result

Expand All @@ -986,7 +988,9 @@ cpdef ndarray[object] astype_unicode(ndarray arr):
ndarray[object] result = np.empty(n, dtype=object)

for i in range(n):
util.set_value_at(result, i, unicode(arr[i]))
# we can use the unsafe version because we know `result` is mutable
# since it was created from `np.empty`
util.set_value_at_unsafe(result, i, unicode(arr[i]))

return result

Expand All @@ -997,7 +1001,9 @@ cpdef ndarray[object] astype_str(ndarray arr):
ndarray[object] result = np.empty(n, dtype=object)

for i in range(n):
util.set_value_at(result, i, str(arr[i]))
# we can use the unsafe version because we know `result` is mutable
# since it was created from `np.empty`
util.set_value_at_unsafe(result, i, str(arr[i]))

return result

Expand Down
15 changes: 14 additions & 1 deletion pandas/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ cdef inline object get_value_at(ndarray arr, object loc):

return get_value_1d(arr, i)

cdef inline set_value_at(ndarray arr, object loc, object value):
cdef inline set_value_at_unsafe(ndarray arr, object loc, object value):
"""Sets a value into the array without checking the writeable flag.
This should be used when setting values in a loop, check the writeable
flag above the loop and then eschew the check on each iteration.
"""
cdef:
Py_ssize_t i, sz
if is_float_object(loc):
Expand All @@ -87,6 +92,14 @@ cdef inline set_value_at(ndarray arr, object loc, object value):

assign_value_1d(arr, i, value)

cdef inline set_value_at(ndarray arr, object loc, object value):
"""Sets a value into the array after checking that the array is mutable.
"""
if not cnp.PyArray_ISWRITEABLE(arr):
raise ValueError('assignment destination is read-only')

set_value_at_unsafe(arr, loc, value)

cdef inline int is_contiguous(ndarray arr):
return cnp.PyArray_CHKFLAGS(arr, cnp.NPY_C_CONTIGUOUS)

Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/series/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1947,6 +1947,40 @@ def test_multilevel_preserve_name(self):
self.assertEqual(result.name, s.name)
self.assertEqual(result2.name, s.name)

def test_setitem_scalar_into_readonly_backing_data(self):
# GH14359: test that you cannot mutate a read only buffer

array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)

for n in range(len(series)):
with self.assertRaises(ValueError):
series[n] = 1

self.assertEqual(
array[n],
0,
msg='even though the ValueError was raised, the underlying'
' array was still mutated!',
)

def test_setitem_slice_into_readonly_backing_data(self):
# GH14359: test that you cannot mutate a read only buffer

array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)

with self.assertRaises(ValueError):
series[1:3] = 1

self.assertTrue(
not array.any(),
msg='even though the ValueError was raised, the underlying'
' array was still mutated!',
)

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,8 @@ def pxd(name):
'pandas/src/period_helper.c']},
index={'pyxfile': 'index',
'sources': ['pandas/src/datetime/np_datetime.c',
'pandas/src/datetime/np_datetime_strings.c']},
'pandas/src/datetime/np_datetime_strings.c'],
'pxdfiles': ['src/util']},
algos={'pyxfile': 'algos',
'pxdfiles': ['src/util'],
'depends': _pxi_dep['algos']},
Expand Down

0 comments on commit 5cf6d94

Please sign in to comment.