Skip to content

Commit

Permalink
ENH: DataFrame.from_records takes iterator #1794
Browse files Browse the repository at this point in the history
  • Loading branch information
Chang She authored and wesm committed Nov 20, 2012
1 parent b8dae94 commit 4a6e6d2
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
29 changes: 28 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ def to_dict(self, outtype='dict'):

@classmethod
def from_records(cls, data, index=None, exclude=None, columns=None,
coerce_float=False):
coerce_float=False, nrows=None):
"""
Convert structured or record ndarray to DataFrame
Expand Down Expand Up @@ -906,6 +906,33 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
raise ValueError('Non-unique columns not yet supported in '
'from_records')

if com.is_iterator(data):
if nrows == 0:
return DataFrame()

try:
first_row = data.next()
except StopIteration:
return DataFrame(index=index, columns=columns)

dtype = None
if hasattr(first_row, 'dtype') and first_row.dtype.names:
dtype = first_row.dtype

values = [first_row]

i = 1
for row in data:
values.append(row)
i += 1
if i >= nrows:
break

if dtype is not None:
data = np.array(values, dtype=dtype)
else:
data = values

if isinstance(data, (np.ndarray, DataFrame, dict)):
keys, sdict = _rec_to_dict(data)
if columns is None:
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2639,6 +2639,19 @@ def test_from_records_nones(self):
df = DataFrame.from_records(tuples, columns=['a', 'b', 'c', 'd'])
self.assert_(np.isnan(df['c'][0]))

def test_from_records_iterator(self):
arr = np.array([(1.0, 2), (3.0, 4), (5., 6), (7., 8)],
dtype=[('x', float), ('y', int)])
df = DataFrame.from_records(iter(arr), nrows=2)
xp = DataFrame({'x' : np.array([1.0, 3.0], dtype=float),
'y' : np.array([2, 4], dtype=int)})
assert_frame_equal(df, xp)

arr = [(1.0, 2), (3.0, 4), (5., 6), (7., 8)]
df = DataFrame.from_records(iter(arr), columns=['x', 'y'],
nrows=2)
assert_frame_equal(df, xp)

def test_from_records_columns_not_modified(self):
tuples = [(1, 2, 3),
(1, 2, 3),
Expand Down

0 comments on commit 4a6e6d2

Please sign in to comment.