diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py index eca91ce51f9..a08d5431861 100644 --- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py +++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cudf.tests.utils import assert_eq import io -import cudf + import fastavro import pytest +import cudf +from cudf.tests.utils import assert_eq + + def cudf_from_avro_util(schema, records): schema = fastavro.parse_schema(schema) buffer = io.BytesIO() @@ -25,103 +28,119 @@ def cudf_from_avro_util(schema, records): buffer.seek(0) return cudf.read_avro(buffer) + avro_type_params = [ - ('boolean', 'bool'), - ('int', 'int32'), - ('long', 'int64'), - ('float', 'float32'), - ('double', 'float64'), - ('bytes', 'str'), - ('string', 'str'), + ("boolean", "bool"), + ("int", "int32"), + ("long", "int64"), + ("float", "float32"), + ("double", "float64"), + ("bytes", "str"), + ("string", "str"), ] + @pytest.mark.parametrize("avro_type, expected_dtype", avro_type_params) -@pytest.mark.parametrize("namespace", [None, 'root_ns']) +@pytest.mark.parametrize("namespace", [None, "root_ns"]) @pytest.mark.parametrize("nullable", [True, False]) -def test_can_detect_dtype_from_avro_type(avro_type, expected_dtype, namespace, nullable): - avro_type = avro_type if not nullable else ['null', avro_type] - - schema = fastavro.parse_schema({ - 'type': 'record', - 'name': 'test', - 'namespace': namespace, - 'fields': [ {'name': 'prop', 'type': avro_type } ], - }) +def test_can_detect_dtype_from_avro_type( + avro_type, expected_dtype, namespace, nullable +): + avro_type = avro_type if not nullable else ["null", avro_type] + + schema = fastavro.parse_schema( + { + "type": "record", + "name": "test", + "namespace": namespace, + "fields": [{"name": "prop", "type": avro_type}], + } + ) actual = cudf_from_avro_util(schema, []) - expected = cudf.DataFrame({ - 'prop': cudf.Series(None, None, expected_dtype) - }) + expected = cudf.DataFrame( + {"prop": cudf.Series(None, None, expected_dtype)} + ) assert_eq(expected, actual) + @pytest.mark.parametrize("avro_type, expected_dtype", avro_type_params) -@pytest.mark.parametrize("namespace", [None, 'root_ns']) +@pytest.mark.parametrize("namespace", [None, "root_ns"]) @pytest.mark.parametrize("nullable", [True, False]) -def test_can_detect_dtype_from_avro_type_nested(avro_type, expected_dtype, namespace, nullable): - avro_type = avro_type if not nullable else ['null', avro_type] +def test_can_detect_dtype_from_avro_type_nested( + avro_type, expected_dtype, namespace, nullable +): + avro_type = avro_type if not nullable else ["null", avro_type] schema_leaf = { - 'name': 'leaf', - 'type': 'record', - 'fields': [ { 'name': 'prop3', 'type': avro_type } ] + "name": "leaf", + "type": "record", + "fields": [{"name": "prop3", "type": avro_type}], } schema_child = { - 'name': 'child', - 'type': 'record', - 'fields': [ { 'name': 'prop2', 'type': schema_leaf } ] + "name": "child", + "type": "record", + "fields": [{"name": "prop2", "type": schema_leaf}], } schema_root = { - 'name': 'root', - 'type': 'record', - 'namespace': namespace, - 'fields': [ { 'name': 'prop1', 'type': schema_child } ], + "name": "root", + "type": "record", + "namespace": namespace, + "fields": [{"name": "prop1", "type": schema_child}], } actual = cudf_from_avro_util(schema_root, []) - col_name = "{ns}child.{ns}leaf.prop3".format(ns='' if namespace is None else namespace + '.') + col_name = "{ns}child.{ns}leaf.prop3".format( + ns="" if namespace is None else namespace + "." + ) - expected = cudf.DataFrame({ - col_name: cudf.Series(None, None, expected_dtype) - }) + expected = cudf.DataFrame( + {col_name: cudf.Series(None, None, expected_dtype)} + ) assert_eq(expected, actual) -@pytest.mark.parametrize("avro_type, cudf_type, avro_val, cudf_val", [ - ('boolean', 'bool', True, True), - ('boolean', 'bool', False, False), - ('int', 'int32', 1234, 1234), - ('long', 'int64', 1234, 1234), - ('float', 'float32', 12.34, 12.34), - ('double', 'float64', 12.34, 12.34), - ('string', 'str', 'hey', 'hey'), - # ('bytes', 'str', 'hey', 'hey'), -]) + +@pytest.mark.parametrize( + "avro_type, cudf_type, avro_val, cudf_val", + [ + ("boolean", "bool", True, True), + ("boolean", "bool", False, False), + ("int", "int32", 1234, 1234), + ("long", "int64", 1234, 1234), + ("float", "float32", 12.34, 12.34), + ("double", "float64", 12.34, 12.34), + ("string", "str", "hey", "hey"), + # ('bytes', 'str', 'hey', 'hey'), + ], +) def test_can_parse_values(avro_type, cudf_type, avro_val, cudf_val): schema_root = { - 'name': 'root', - 'type': 'record', - 'fields': [ { 'name': 'prop', 'type': ['null', avro_type] } ], + "name": "root", + "type": "record", + "fields": [{"name": "prop", "type": ["null", avro_type]}], } records = [ - {u'prop': avro_val}, - {u'prop': None}, + {"prop": avro_val}, + {"prop": None}, ] actual = cudf_from_avro_util(schema_root, records) - expected = cudf.DataFrame({ - 'prop': cudf.Series(data=[cudf_val, None], dtype=cudf_type) - }) + expected = cudf.DataFrame( + {"prop": cudf.Series(data=[cudf_val, None], dtype=cudf_type)} + ) assert_eq(expected, actual) + # @pytest.mark.parametrize("avro_type, cudf_type", avro_type_params) # def test_can_parse_single_null(avro_type, cudf_type):