Skip to content

Commit

Permalink
Update comments (#15)
Browse files Browse the repository at this point in the history
* add write protection to ParamDict

* update

* add more functions to pyarrow

* update

* add comments

* bump up version
  • Loading branch information
Han Wang authored May 1, 2020
1 parent 02fde15 commit 10af8a0
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 7 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

VERSION = "0.1.7"
VERSION = "0.1.8"

with open("README.md") as f:
LONG_DESCRIPTION = f.read()
Expand Down
10 changes: 5 additions & 5 deletions tests/utils/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
expression_to_schema, get_eq_func,
is_supported, pandas_to_schema,
schema_to_expression, to_pa_datatype,
validate_column_name, SchemadDataPartitioner)
validate_column_name, SchemaedDataPartitioner)


def test_validate_column_name():
Expand Down Expand Up @@ -164,12 +164,12 @@ def test_get_eq_func():
assert get_eq_func(t)(None, None)


def test_schemad_data_partitioner():
p0 = SchemadDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
def test_schemaed_data_partitioner():
p0 = SchemaedDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
key_positions=[2, 0], row_limit=0)
p1 = SchemadDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
p1 = SchemaedDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
key_positions=[2, 0], row_limit=1)
p2 = SchemadDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
p2 = SchemaedDataPartitioner(schema=expression_to_schema("a:int,b:int,c:int"),
key_positions=[2, 0], row_limit=2)
data = [[0, 0, 0], [0, 1, 0], [0, 2, 0], [1, 0, 0]]
_test_partition(p0, data, "0,0,[0,1,2];1,0,[3]")
Expand Down
17 changes: 16 additions & 1 deletion triad/utils/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,17 @@ def get_eq_func(data_type: pa.DataType) -> Callable[[Any, Any], bool]:
return _general_eq


class SchemadDataPartitioner(object):
class SchemaedDataPartitioner(object):
"""Partitioner for stream of array like data with given schema.
It uses :func"`~triad.utils.iter.Slicer` to partition the stream
:param schema: the schema of the data stream to process
:param key_positions: positions of partition keys on `schema`
:param sizer: the function to get size of an item
:param row_limit: max row for each slice, defaults to None
:param size_limit: max byte size for each slice, defaults to None
"""

def __init__(
self,
schema: pa.Schema,
Expand All @@ -251,6 +261,11 @@ def __init__(
def partition(
self, data: Iterable[Any]
) -> Iterable[Tuple[int, int, EmptyAwareIterable[Any]]]:
"""Partition the given data stream
:param data: iterable of array like objects
:yield: iterable of <partition_no, slice_no, slice iterable> tuple
"""
self._hitting_boundary = False
slice_no = 0
partition_no = 0
Expand Down

0 comments on commit 10af8a0

Please sign in to comment.