Skip to content

Commit

Permalink
COMPAT: avoid calling getsizeof() on PyPy
Browse files Browse the repository at this point in the history
  • Loading branch information
mattip committed Aug 11, 2017
1 parent 3e9e947 commit 27487c1
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 20 deletions.
7 changes: 4 additions & 3 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
unique='IndexOpsMixin', duplicated='IndexOpsMixin')

import platform
IS_PYPY = platform.python_implementation() == 'PyPy'

class StringMixin(object):
"""implements string methods so long as object defines a `__unicode__`
Expand Down Expand Up @@ -1061,7 +1063,7 @@ def memory_usage(self, deep=False):
Notes
-----
Memory usage does not include memory consumed by elements that
are not components of the array if deep=False
are not components of the array if deep=False or if used on PyPy
See Also
--------
Expand All @@ -1071,9 +1073,8 @@ def memory_usage(self, deep=False):
return self.values.memory_usage(deep=deep)

v = self.values.nbytes
if deep and is_object_dtype(self):
if deep and is_object_dtype(self) and not IS_PYPY:
v += lib.memory_usage_of_objects(self.values)

return v

def factorize(self, sort=False, na_sentinel=-1):
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,8 @@ def _nbytes(self, deep=False):
"""
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
label_nbytes = sum((i.nbytes for i in self.labels))
names_nbytes = sum((getsizeof(i) for i in self.names))
objsize = 24 # for inplementations with no meaningfule getsizeof (PyPy)
names_nbytes = sum((getsizeof(i, 24) for i in self.names))
result = level_nbytes + label_nbytes + names_nbytes

# include our engine hashtable
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,11 @@ def _format_data(self):

@cache_readonly
def nbytes(self):
""" return the number of bytes in the underlying data """
return sum([getsizeof(getattr(self, v)) for v in
""" return the number of bytes in the underlying data
On implementations where this is problematic (PyPy)
assume 24 bytes for each value
"""
return sum([getsizeof(getattr(self, v), 24) for v in
['_start', '_stop', '_step']])

def memory_usage(self, deep=False):
Expand Down
23 changes: 13 additions & 10 deletions pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,13 +332,15 @@ def test_info_memory_usage(self):
res = buf.getvalue().splitlines()
assert re.match(r"memory usage: [^+]+$", res[-1])

assert (df_with_object_index.memory_usage(
index=True, deep=True).sum() > df_with_object_index.memory_usage(
index=True).sum())
if not tm.IS_PYPY:
assert (df_with_object_index.memory_usage(
index=True,
deep=True).sum() > df_with_object_index.memory_usage(
index=True).sum())

df_object = pd.DataFrame({'a': ['a']})
assert (df_object.memory_usage(deep=True).sum() >
df_object.memory_usage().sum())
df_object = pd.DataFrame({'a': ['a']})
assert (df_object.memory_usage(deep=True).sum() >
df_object.memory_usage().sum())

# Test a DataFrame with duplicate columns
dtypes = ['int64', 'int64', 'int64', 'float64']
Expand Down Expand Up @@ -377,10 +379,11 @@ def test_info_memory_usage(self):
df.memory_usage(index=True)
df.index.values.nbytes

# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
assert abs(diff) < 100
if not tm.IS_PYPY:
# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
assert abs(diff) < 100

def test_info_memory_usage_qualified(self):

Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def f():

pytest.raises(TypeError, f)

@pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy")
def test_memory_usage(self):
# Delegate does not implement memory_usage.
# Check that we fall back to in-built `__sizeof__`
Expand Down Expand Up @@ -941,6 +942,7 @@ def test_fillna(self):
# check shallow_copied
assert o is not result

@pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy")
def test_memory_usage(self):
for o in self.objs:
res = o.memory_usage()
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,10 +1448,11 @@ def test_memory_usage(self):
cat = pd.Categorical(['foo', 'foo', 'bar'])
assert cat.memory_usage(deep=True) > cat.nbytes

# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
assert abs(diff) < 100
if not tm.IS_PYPY:
# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
assert abs(diff) < 100

def test_searchsorted(self):
# https://github.com/pandas-dev/pandas/issues/8420
Expand Down
2 changes: 2 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
K = 4
_RAISE_NETWORK_ERROR_DEFAULT = False

import platform
IS_PYPY = platform.python_implementation() == 'PyPy'

# set testing_mode
_testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)
Expand Down

0 comments on commit 27487c1

Please sign in to comment.