From 50019872d61f5bfb8847bb8778931dd4252eceff Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 1 Nov 2023 19:57:21 +0100 Subject: [PATCH 1/4] error when writing a dataframe with a date column and non-consecutive index values --- pyogrio/geopandas.py | 1 + pyogrio/tests/test_geopandas_io.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index 9aa6f8e6..c65ddde5 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -439,6 +439,7 @@ def write_dataframe( # TODO: may need to fill in pd.NA, etc field_data = [] field_mask = [] + df = df.reset_index(drop=True) # dict[str, np.array(int)] special case for dt-tz fields gdal_tz_offsets = {} for name in fields: diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index a69acec9..76563759 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -188,6 +188,8 @@ def test_read_datetime(test_fgdb_vsi, use_arrow): def test_read_datetime_tz(test_datetime_tz, tmp_path): df = read_dataframe(test_datetime_tz) + # Make the index non-consecutive to test this case as well + df = df.set_index(np.array([0, 2])) raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"] if PANDAS_GE_20: @@ -195,7 +197,7 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path): else: expected = pd.to_datetime(raw_expected) expected = pd.Series(expected, name="datetime_col") - assert_series_equal(df.datetime_col, expected) + assert_series_equal(df.datetime_col, expected, check_index=False) # test write and read round trips fpath = tmp_path / "test.gpkg" write_dataframe(df, fpath) From 45081067a067de8411e92a6fee48c26eb91cee15 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 1 Nov 2023 20:01:12 +0100 Subject: [PATCH 2/4] Update CHANGES.md --- CHANGES.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index f911f673..0e5556ee 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,12 @@ # CHANGELOG +## 0.7.3 (???) + +### Bug fixes + +- Fix error in `write_dataframe` if input has a date column and + non-consecutive index values (#325). + ## 0.7.2 (2023-10-30) ### Bug fixes From 56dd49fcde247a4dd2244ebadc471a87875093eb Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 1 Nov 2023 20:03:23 +0100 Subject: [PATCH 3/4] Update test_geopandas_io.py --- pyogrio/tests/test_geopandas_io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 76563759..9b20c392 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -188,7 +188,8 @@ def test_read_datetime(test_fgdb_vsi, use_arrow): def test_read_datetime_tz(test_datetime_tz, tmp_path): df = read_dataframe(test_datetime_tz) - # Make the index non-consecutive to test this case as well + # Make the index non-consecutive to test this case as well. Added for issue + # https://github.com/geopandas/pyogrio/issues/324 df = df.set_index(np.array([0, 2])) raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"] From af200e276f4229dff50d97d21373e5915482e16c Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 1 Nov 2023 23:32:11 +0100 Subject: [PATCH 4/4] Cleaner solution as suggested by Brendan --- pyogrio/geopandas.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index c65ddde5..7cc21db7 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -439,7 +439,6 @@ def write_dataframe( # TODO: may need to fill in pd.NA, etc field_data = [] field_mask = [] - df = df.reset_index(drop=True) # dict[str, np.array(int)] special case for dt-tz fields gdal_tz_offsets = {} for name in fields: @@ -457,7 +456,7 @@ def write_dataframe( # https://gdal.org/development/rfc/rfc56_millisecond_precision.html#core-changes # Convert each row offset to a signed multiple of 15m and add to GMT value gdal_offset_representation = tz_offset // pd.Timedelta("15m") + 100 - gdal_tz_offsets[name] = gdal_offset_representation + gdal_tz_offsets[name] = gdal_offset_representation.values else: values = col.values if isinstance(values, pd.api.extensions.ExtensionArray):