Update comments (#15)

* add write protection to ParamDict * update * add more functions to pyarrow * update * add comments * bump up version
fugue-project · May 1, 2020 · 10af8a0 · 10af8a0
1 parent 02fde15
commit 10af8a0
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 7 deletions.
diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-VERSION = "0.1.7"
+VERSION = "0.1.8"
 
 with open("README.md") as f:
     LONG_DESCRIPTION = f.read()

diff --git a/tests/utils/test_pyarrow.py b/tests/utils/test_pyarrow.py
@@ -8,7 +8,7 @@
                                  expression_to_schema, get_eq_func,
                                  is_supported, pandas_to_schema,
                                  schema_to_expression, to_pa_datatype,
-                                 validate_column_name, SchemadDataPartitioner)
+                                 validate_column_name, SchemaedDataPartitioner)
 
 
 def test_validate_column_name():
@@ -164,12 +164,12 @@ def test_get_eq_func():
     assert get_eq_func(t)(None, None)
 
 
-def test_schemad_data_partitioner():
-    p0 = SchemadDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
+def test_schemaed_data_partitioner():
+    p0 = SchemaedDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
                                 key_positions=[2, 0], row_limit=0)
-    p1 = SchemadDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
+    p1 = SchemaedDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
                                 key_positions=[2, 0], row_limit=1)
-    p2 = SchemadDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
+    p2 = SchemaedDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
                                 key_positions=[2, 0], row_limit=2)
     data = [[0, 0, 0], [0, 1, 0], [0, 2, 0], [1, 0, 0]]
     _test_partition(p0, data, "0,0,[0,1,2];1,0,[3]")

diff --git a/triad/utils/pyarrow.py b/triad/utils/pyarrow.py
@@ -227,7 +227,17 @@ def get_eq_func(data_type: pa.DataType) -> Callable[[Any, Any], bool]:
     return _general_eq
 
 
-class SchemadDataPartitioner(object):
+class SchemaedDataPartitioner(object):
+    """Partitioner for stream of array like data with given schema.
+    It uses :func"`~triad.utils.iter.Slicer` to partition the stream
+
+    :param schema: the schema of the data stream to process
+    :param key_positions: positions of partition keys on `schema`
+    :param sizer: the function to get size of an item
+    :param row_limit: max row for each slice, defaults to None
+    :param size_limit: max byte size for each slice, defaults to None
+    """
+
     def __init__(
         self,
         schema: pa.Schema,
@@ -251,6 +261,11 @@ def __init__(
     def partition(
         self, data: Iterable[Any]
     ) -> Iterable[Tuple[int, int, EmptyAwareIterable[Any]]]:
+        """Partition the given data stream
+
+        :param data: iterable of array like objects
+        :yield: iterable of <partition_no, slice_no, slice iterable> tuple
+        """
         self._hitting_boundary = False
         slice_no = 0
         partition_no = 0