pandas-dev · tinproject · Jan 10, 2014
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -16,6 +16,7 @@
 import collections
 import warnings
 import types
+from itertools import islice, chain
 
 from numpy import nan as NA
 import numpy as np
@@ -159,6 +160,8 @@ class DataFrame(NDFrame):
         Data type to force, otherwise infer
     copy : boolean, default False
         Copy data from inputs. Only affects DataFrame / 2d ndarray input
+    count : int or None, when data it's a generator, number of values to
+        read. If None reads the whole generator
 
     Examples
     --------
@@ -185,7 +188,7 @@ def _constructor(self):
     _constructor_sliced = Series
 
     def __init__(self, data=None, index=None, columns=None, dtype=None,
-                 copy=False):
+                 copy=False, count=None):
         if data is None:
             data = {}
         if dtype is not None:
@@ -232,7 +235,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
                                          copy=copy)
         elif isinstance(data, (list, types.GeneratorType)):
             if isinstance(data, types.GeneratorType):
-                data = list(data)
+                data = list(islice(data, count))
             if len(data) > 0:
                 if index is None and isinstance(data[0], Series):
                     index = _get_names_from_index(data)
@@ -705,7 +708,7 @@ def to_gbq(self, destination_table, schema=None, col_order=None,
 
     @classmethod
     def from_records(cls, data, index=None, exclude=None, columns=None,
-                     coerce_float=False, nrows=None):
+                     coerce_float=False, count=None, nrows=None):
         """
         Convert structured or record ndarray to DataFrame
 
@@ -726,44 +729,38 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
         coerce_float : boolean, default False
             Attempt to convert values to non-string, non-numeric objects (like
             decimal.Decimal) to floating point, useful for SQL result sets
+        count : int or None, number of records to read from a generator.
+            If None reads the whole generator
 
         Returns
         -------
         df : DataFrame
         """
+        #Deprecate undocumented nrows
+        if nrows is not None:
+            warnings.warn("nrows is deprecated, use count",
+                          FutureWarning)
+            count = nrows
+
         # Make a copy of the input columns so we can modify it
         if columns is not None:
             columns = _ensure_index(columns)
 
         if com.is_iterator(data):
-            if nrows == 0:
+            if count == 0:
                 return cls()
 
             try:
-                if compat.PY3:
-                    first_row = next(data)
-                else:
-                    first_row = next(data)
+                first_row = next(data)
             except StopIteration:
                 return cls(index=index, columns=columns)
 
             dtype = None
             if hasattr(first_row, 'dtype') and first_row.dtype.names:
                 dtype = first_row.dtype
 
-            values = [first_row]
-
-            # if unknown length iterable (generator)
-            if nrows is None:
-                # consume whole generator
-                values += list(data)
-            else:
-                i = 1
-                for row in data:
-                    values.append(row)
-                    i += 1
-                    if i >= nrows:
-                        break
+            # put the generator in a list
+            values = list(islice(chain([first_row], data), count))
 
             if dtype is not None:
                 data = np.array(values, dtype=dtype)

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2,6 +2,7 @@
 Data structure for 1-dimensional cross-sectional and time series data
 """
 from __future__ import division
+from itertools import islice
 
 # pylint: disable=E1101,E1103
 # pylint: disable=W0703,W0622,W0613,W0201
@@ -118,11 +119,13 @@ class Series(generic.NDFrame):
     dtype : numpy.dtype or None
         If None, dtype will be inferred
     copy : boolean, default False, copy input data
+    count : int or None, number of values to read from a generator.
+        If None reads the whole generator
     """
     _metadata = ['name']
 
     def __init__(self, data=None, index=None, dtype=None, name=None,
-                 copy=False, fastpath=False):
+                 copy=False, count=None, fastpath=False):
 
         # we are called internally, so short-circuit
         if fastpath:
@@ -192,7 +195,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                     name = data.name
                 data = np.asarray(data)
             elif isinstance(data, types.GeneratorType):
-                data = list(data)
+                data = list(islice(data, count))
             elif isinstance(data, (set, frozenset)):
                 raise TypeError("{0!r} type is unordered"
                                 "".format(data.__class__.__name__))

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -2791,6 +2791,15 @@ def test_constructor_generator(self):
         expected = DataFrame({ 0 : range(10), 1 : 'a' })
         assert_frame_equal(result, expected, check_dtype=False)
 
+    def test_constructor_generator_count_limit(self):
+        generator_length = 10
+        expected_length = 5
+
+        #only works when data it'a a generator, not a collection
+        gen = ([ i, 'a'] for i in range(generator_length))
+        result = DataFrame(gen, count=expected_length)
+        self.assertEqual(len(result), expected_length)
+
     def test_constructor_list_of_dicts(self):
         data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
                 OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]),
@@ -3820,6 +3829,14 @@ def list_generator(length):
         result = DataFrame.from_records(generator, columns=columns_names)
         assert_frame_equal(result, expected)
 
+    def test_from_records_generator_count_limit(self):
+        def generator(length):
+            for i in range(length):
+                yield (i, i/2)
+        expected_length = 5
+        df = DataFrame.from_records(generator(10), count=expected_length)
+        self.assertEqual(len(df), expected_length)
+
     def test_from_records_columns_not_modified(self):
         tuples = [(1, 2, 3),
                   (1, 2, 3),

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -403,6 +403,14 @@ def test_constructor_generator(self):
         exp.index = lrange(10, 20)
         assert_series_equal(result, exp)
 
+    def test_constructor_generator_count_limit(self):
+        generator_length = 10
+        expected_length = 5
+        gen = (i for i in range(generator_length))
+
+        result = Series(gen, count=expected_length)
+        self.assertEqual(len(result), expected_length)
+
     def test_constructor_categorical(self):
         cat = pd.Categorical([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'])
         res = Series(cat)