From da48362d266ce96bb569fef8eac9aa9de95cc52b Mon Sep 17 00:00:00 2001 From: theroggy Date: Mon, 6 Jun 2022 02:48:04 +0200 Subject: [PATCH 1/6] Add support to write nan values in object columns --- pyogrio/_io.pyx | 19 +++++++++++-------- pyogrio/tests/test_geopandas_io.py | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 30690c63..fdd27138 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -14,6 +14,7 @@ import warnings from libc.stdint cimport uint8_t from libc.stdlib cimport malloc, free from libc.string cimport strlen +from libc.math cimport isnan cimport cython import numpy as np @@ -1364,17 +1365,19 @@ def ogr_write(str path, str layer, str driver, geometry, field_data, fields, elif field_type == OFTString: # TODO: encode string using approach from _get_internal_encoding which checks layer capabilities - try: - # this will fail for strings mixed with nans - value_b = field_value.encode("UTF-8") + if isinstance(field_value, float) and isnan(field_value): + OGR_F_SetFieldNull(ogr_feature, field_idx) + else: - except AttributeError: - raise ValueError(f"Could not encode value '{field_value}' in field '{fields[field_idx]}' to string") + try: + value_b = field_value.encode("UTF-8") + OGR_F_SetFieldString(ogr_feature, field_idx, value_b) - except Exception: - raise + except AttributeError: + raise ValueError(f"Could not encode value '{field_value}' in field '{fields[field_idx]}' to string") - OGR_F_SetFieldString(ogr_feature, field_idx, value_b) + except Exception: + raise elif field_type == OFTInteger: OGR_F_SetFieldInteger(ogr_feature, field_idx, field_value) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 6d1bbf76..b7d04924 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -704,3 +704,26 @@ def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext): assert crs["lat_2"] == 51.5 assert crs["lon_0"] == 4.3 assert df.crs.equals(expected.crs) + + +@pytest.mark.parametrize("ext", ALL_EXTS) +def test_write_read_null(tmp_path, ext): + from shapely.geometry import Point + + output_path = tmp_path / f"test_write_nan.{ext}" + geom = Point(0, 0) + test_data = { + "geometry": [geom, geom, geom], + "float64": [1.0, None, np.nan], + "object_str": ["test", None, np.nan], + } + test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370") + write_dataframe(test_gdf, output_path) + result_gdf = read_dataframe(output_path) + assert len(test_gdf) == len(result_gdf) + assert result_gdf["float64"][0] == 1.0 + assert pd.isna(result_gdf["float64"][1]) + assert pd.isna(result_gdf["float64"][2]) + assert result_gdf["object_str"][0] == "test" + assert result_gdf["object_str"][1] is None + assert result_gdf["object_str"][2] is None From ce1b68a56034fbd24a40f584751a7362d750ba20 Mon Sep 17 00:00:00 2001 From: theroggy Date: Mon, 6 Jun 2022 02:57:30 +0200 Subject: [PATCH 2/6] Add to changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 530014aa..998f7ce7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -22,6 +22,7 @@ - Use certifi to set `GDAL_CURL_CA_BUNDLE` / `PROJ_CURL_CA_BUNDLE` defaults (#97) - automatically detect driver for `.geojson`, `.geojsonl` and `.geojsons` files (#101) - read DateTime fields with millisecond accuracy (#111) +- support writing object columns with np.nan values (#60) ### Breaking changes From 530b5d4c68a3380d2a43caa3a9fe45b3ca717aa3 Mon Sep 17 00:00:00 2001 From: theroggy Date: Mon, 6 Jun 2022 02:59:32 +0200 Subject: [PATCH 3/6] Consistent newlines --- pyogrio/_io.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index fdd27138..76a1e6c4 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1367,8 +1367,8 @@ def ogr_write(str path, str layer, str driver, geometry, field_data, fields, # TODO: encode string using approach from _get_internal_encoding which checks layer capabilities if isinstance(field_value, float) and isnan(field_value): OGR_F_SetFieldNull(ogr_feature, field_idx) - else: + else: try: value_b = field_value.encode("UTF-8") OGR_F_SetFieldString(ogr_feature, field_idx, value_b) From 744a031e138e4ae89eacc59e4f183667025caa45 Mon Sep 17 00:00:00 2001 From: theroggy Date: Mon, 6 Jun 2022 18:34:11 +0200 Subject: [PATCH 4/6] Move is None check within string/object handling --- pyogrio/_io.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 76a1e6c4..bf17c194 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1360,12 +1360,12 @@ def ogr_write(str path, str layer, str driver, geometry, field_data, fields, field_value = field_data[field_idx][i] field_type = field_types[field_idx][0] - if field_value is None: - OGR_F_SetFieldNull(ogr_feature, field_idx) - - elif field_type == OFTString: + if field_type == OFTString: # TODO: encode string using approach from _get_internal_encoding which checks layer capabilities - if isinstance(field_value, float) and isnan(field_value): + if ( + field_value is None + or (isinstance(field_value, float) and isnan(field_value)) + ): OGR_F_SetFieldNull(ogr_feature, field_idx) else: From 6c38520d77d1d98445922e01ad6de4aaf8f41acc Mon Sep 17 00:00:00 2001 From: theroggy Date: Mon, 6 Jun 2022 18:44:01 +0200 Subject: [PATCH 5/6] Only test on gpkg --- pyogrio/tests/test_geopandas_io.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index b7d04924..c13ec78a 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -706,11 +706,10 @@ def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext): assert df.crs.equals(expected.crs) -@pytest.mark.parametrize("ext", ALL_EXTS) -def test_write_read_null(tmp_path, ext): +def test_write_read_null(tmp_path): from shapely.geometry import Point - output_path = tmp_path / f"test_write_nan.{ext}" + output_path = tmp_path / f"test_write_nan.gpkg" geom = Point(0, 0) test_data = { "geometry": [geom, geom, geom], From cb28a2e379874bf1f5614b309527bbf24236ea96 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Mon, 6 Jun 2022 18:48:58 +0200 Subject: [PATCH 6/6] Update CHANGES.md Co-authored-by: Brendan Ward --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 998f7ce7..c8a92a99 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -22,7 +22,7 @@ - Use certifi to set `GDAL_CURL_CA_BUNDLE` / `PROJ_CURL_CA_BUNDLE` defaults (#97) - automatically detect driver for `.geojson`, `.geojsonl` and `.geojsons` files (#101) - read DateTime fields with millisecond accuracy (#111) -- support writing object columns with np.nan values (#60) +- support writing object columns with np.nan values (#118) ### Breaking changes