From e075091377a37c4341bd0d4471ac950b161b8676 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sun, 7 May 2023 22:07:49 +1000
Subject: [PATCH 01/51] minimal working pandas layer without timezones

---
 pyogrio/_io.pyx      | 34 +++++++++++++++++++++-------------
 pyogrio/geopandas.py |  6 ++++++
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index 9065904c..5f6c8410 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -657,22 +657,26 @@ cdef process_fields(
             data[i] = bin_value[:ret_length]
 
         elif field_type == OFTDateTime or field_type == OFTDate:
-            success = OGR_F_GetFieldAsDateTimeEx(
-                ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
+            # defer datetime parsing to pandas layer (TODO this would be a regression in the numpy, implement datetime_as_string toggle as suggested)
+            value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
+            data[i] = value
 
-            ms, ss = math.modf(fsecond)
-            second = int(ss)
-            # fsecond has millisecond accuracy
-            microsecond = round(ms * 1000) * 1000
+            # success = OGR_F_GetFieldAsDateTimeEx(
+            #     ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
 
-            if not success:
-                data[i] = np.datetime64('NaT')
+            # ms, ss = math.modf(fsecond)
+            # second = int(ss)
+            # # fsecond has millisecond accuracy
+            # microsecond = round(ms * 1000) * 1000
 
-            elif field_type == OFTDate:
-                data[i] = datetime.date(year, month, day).isoformat()
+            # if not success:
+            #     data[i] = np.datetime64('NaT')
 
-            elif field_type == OFTDateTime:
-                data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()
+            # elif field_type == OFTDate:
+            #     data[i] = datetime.date(year, month, day).isoformat()
+
+            # elif field_type == OFTDateTime:
+            #     data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()
 
 
 @cython.boundscheck(False)  # Deactivate bounds checking
@@ -718,8 +722,11 @@ cdef get_features(
 
     field_data = [
         np.empty(shape=(num_features, ),
-        dtype=fields[field_index,3]) for field_index in range(n_fields)
+        # TODO cython has walrus?
+        dtype=(fields[field_index,3] if not fields[field_index,3].startswith("datetime") else "object")) for field_index in range(n_fields)
     ]
+    types = [(fields[field_index,2],fields[field_index,3]) for field_index in range(n_fields)]
+    print("types are", types)
 
     field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
     i = 0
@@ -1058,6 +1065,7 @@ def ogr_read(
             'crs': crs,
             'encoding': encoding,
             'fields': fields[:,2], # return only names
+            'dtypes':fields[:,3],
             'geometry_type': geometry_type,
         }
 
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index a69ddb14..ea9e52d2 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -165,6 +165,7 @@ def read_dataframe(
     )
 
     if use_arrow:
+        # TODO what happens to datetimes with arrow?
         meta, table = result
         df = table.to_pandas()
         geometry_name = meta["geometry_name"] or "wkb_geometry"
@@ -175,6 +176,8 @@ def read_dataframe(
             return df
 
     meta, index, geometry, field_data = result
+    print("meta is")
+    print(meta)
 
     columns = meta["fields"].tolist()
     data = {columns[i]: field_data[i] for i in range(len(columns))}
@@ -184,6 +187,9 @@ def read_dataframe(
         index = None
 
     df = pd.DataFrame(data, columns=columns, index=index)
+    for dtype, c in zip(meta["dtypes"], df.columns):
+        if dtype.startswith("datetime"):
+            df[c] = pd.to_datetime(df[c], format="%Y/%m/%d %H:%M:%S%z")
 
     if geometry is None or not read_geometry:
         return df

From 3df7936109b64b39a9d8af75d0c5b8abbb0cd75c Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sun, 7 May 2023 22:25:23 +1000
Subject: [PATCH 02/51] implement datetime_as_string toggle to get numpy layer
 working

---
 pyogrio/_io.pyx      | 58 +++++++++++++++++++++++++-------------------
 pyogrio/geopandas.py |  4 +++
 pyogrio/raw.py       |  6 +++++
 3 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index 5f6c8410..fe311792 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -599,7 +599,8 @@ cdef process_fields(
     object field_data_view,
     object field_indexes,
     object field_ogr_types,
-    encoding
+    encoding,
+    bint datetime_as_string
 ):
     cdef int j
     cdef int success
@@ -657,26 +658,27 @@ cdef process_fields(
             data[i] = bin_value[:ret_length]
 
         elif field_type == OFTDateTime or field_type == OFTDate:
-            # defer datetime parsing to pandas layer (TODO this would be a regression in the numpy, implement datetime_as_string toggle as suggested)
-            value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
-            data[i] = value
-
-            # success = OGR_F_GetFieldAsDateTimeEx(
-            #     ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
+            # defer datetime parsing to pandas layer 
+            if datetime_as_string:
+                value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
+                data[i] = value
+            else:
+                success = OGR_F_GetFieldAsDateTimeEx(
+                    ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
 
-            # ms, ss = math.modf(fsecond)
-            # second = int(ss)
-            # # fsecond has millisecond accuracy
-            # microsecond = round(ms * 1000) * 1000
+                ms, ss = math.modf(fsecond)
+                second = int(ss)
+                # fsecond has millisecond accuracy
+                microsecond = round(ms * 1000) * 1000
 
-            # if not success:
-            #     data[i] = np.datetime64('NaT')
+                if not success:
+                    data[i] = np.datetime64('NaT')
 
-            # elif field_type == OFTDate:
-            #     data[i] = datetime.date(year, month, day).isoformat()
+                elif field_type == OFTDate:
+                    data[i] = datetime.date(year, month, day).isoformat()
 
-            # elif field_type == OFTDateTime:
-            #     data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()
+                elif field_type == OFTDateTime:
+                    data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()
 
 
 @cython.boundscheck(False)  # Deactivate bounds checking
@@ -689,7 +691,8 @@ cdef get_features(
     uint8_t force_2d,
     int skip_features,
     int num_features,
-    uint8_t return_fids
+    uint8_t return_fids,
+    bint datetime_as_string
 ):
 
     cdef OGRFeatureH ogr_feature = NULL
@@ -723,7 +726,7 @@ cdef get_features(
     field_data = [
         np.empty(shape=(num_features, ),
         # TODO cython has walrus?
-        dtype=(fields[field_index,3] if not fields[field_index,3].startswith("datetime") else "object")) for field_index in range(n_fields)
+        dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime") else fields[field_index,3])) for field_index in range(n_fields)
     ]
     types = [(fields[field_index,2],fields[field_index,3]) for field_index in range(n_fields)]
     print("types are", types)
@@ -765,7 +768,7 @@ cdef get_features(
 
             process_fields(
                 ogr_feature, i, n_fields, field_data, field_data_view,
-                field_indexes, field_ogr_types, encoding
+                field_indexes, field_ogr_types, encoding, datetime_as_string
             )
             i += 1
         finally:
@@ -795,7 +798,8 @@ cdef get_features_by_fid(
     object[:,:] fields,
     encoding,
     uint8_t read_geometry,
-    uint8_t force_2d
+    uint8_t force_2d,
+    bint datetime_as_string
 ):
 
     cdef OGRFeatureH ogr_feature = NULL
@@ -818,7 +822,7 @@ cdef get_features_by_fid(
     n_fields = fields.shape[0]
     field_indexes = fields[:,0]
     field_ogr_types = fields[:,1]
-
+    # TODO missing datetime dtype hack on this path?
     field_data = [
         np.empty(shape=(count, ),
         dtype=fields[field_index,3]) for field_index in range(n_fields)
@@ -844,7 +848,7 @@ cdef get_features_by_fid(
 
             process_fields(
                 ogr_feature, i, n_fields, field_data, field_data_view,
-                field_indexes, field_ogr_types, encoding
+                field_indexes, field_ogr_types, encoding, datetime_as_string
             )
         finally:
             if ogr_feature != NULL:
@@ -946,7 +950,9 @@ def ogr_read(
     object fids=None,
     str sql=None,
     str sql_dialect=None,
-    int return_fids=False):
+    int return_fids=False,
+    bint datetime_as_string=False
+    ):
 
     cdef int err = 0
     cdef const char *path_c = NULL
@@ -1029,6 +1035,7 @@ def ogr_read(
                 encoding,
                 read_geometry=read_geometry and geometry_type is not None,
                 force_2d=force_2d,
+                datetime_as_string=datetime_as_string
             )
 
             # bypass reading fids since these should match fids used for read
@@ -1058,7 +1065,8 @@ def ogr_read(
                 force_2d=force_2d,
                 skip_features=skip_features,
                 num_features=num_features,
-                return_fids=return_fids
+                return_fids=return_fids,
+                datetime_as_string=datetime_as_string
             )
 
         meta = {
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index ea9e52d2..c996cb3e 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -146,6 +146,10 @@ def read_dataframe(
     path_or_buffer = _stringify_path(path_or_buffer)
 
     read_func = read_arrow if use_arrow else read
+    if not read_arrow and "datetime_as_string" not in kwargs:
+        kwargs["datetime_as_string"] = True
+        # TODO work out what to do with the arrow api
+
     result = read_func(
         path_or_buffer,
         layer=layer,
diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 27bc5ed4..30b73931 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -53,6 +53,7 @@ def read(
     sql=None,
     sql_dialect=None,
     return_fids=False,
+    datetime_as_string=False,
     **kwargs,
 ):
     """Read OGR data source into numpy arrays.
@@ -108,6 +109,10 @@ def read(
         number of features usings FIDs is also driver specific.
     return_fids : bool, optional (default: False)
         If True, will return the FIDs of the feature that were read.
+    datetime_as_string : bool, optional (default: False)
+        If True, will return datetime dtypes as detected by GDAL as a string
+        array, instead of a datetime64 array.
+
     **kwargs
         Additional driver-specific dataset open options passed to OGR.  Invalid
         options will trigger a warning.
@@ -150,6 +155,7 @@ def read(
             sql_dialect=sql_dialect,
             return_fids=return_fids,
             dataset_kwargs=dataset_kwargs,
+            datetime_as_string=datetime_as_string,
         )
     finally:
         if buffer is not None:

From d68b47301113749c554854ed6aff0b9c850ddfc1 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Mon, 8 May 2023 20:02:46 +1000
Subject: [PATCH 03/51] make tests pass

---
 pyogrio/tests/conftest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py
index 6f69fd92..44b66012 100644
--- a/pyogrio/tests/conftest.py
+++ b/pyogrio/tests/conftest.py
@@ -43,6 +43,7 @@ def prepare_testfile(testfile_path, dst_dir, ext):
         # For .gpkg, spatial_index=False to avoid the rows being reordered
         meta["spatial_index"] = False
         meta["geometry_type"] = "MultiPolygon"
+    meta.pop("dtypes")
 
     write(dst_path, geometry, field_data, **meta)
     return dst_path

From 9aa5a8ca028c6c0264f522a0e73703dac221cd91 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Mon, 8 May 2023 20:16:04 +1000
Subject: [PATCH 04/51] add tests showing existing behaviour no tz

---
 pyogrio/tests/conftest.py          |  5 +++++
 pyogrio/tests/test_geopandas_io.py | 14 ++++++++++++++
 pyogrio/tests/test_raw_io.py       | 14 ++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py
index 44b66012..ef47fc96 100644
--- a/pyogrio/tests/conftest.py
+++ b/pyogrio/tests/conftest.py
@@ -98,3 +98,8 @@ def test_ogr_types_list():
 @pytest.fixture(scope="session")
 def test_datetime():
     return _data_dir / "test_datetime.geojson"
+
+
+@pytest.fixture(scope="session")
+def test_datetime_tz():
+    return _data_dir / "test_datetime_tz.geojson"
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 8e9fd23a..9a14d1cf 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -139,6 +139,20 @@ def test_read_datetime(test_fgdb_vsi):
         assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
 
 
+def test_read_datetime_tz(test_datetime_tz):
+    df = read_dataframe(test_datetime_tz)
+    if Version(pd.__version__) >= Version("2.0.0"):
+        # starting with pandas 2.0, it preserves the passed datetime resolution
+        assert df.col.dtype.name == "datetime64[ms]"
+    else:
+        assert df.col.dtype.name == "datetime64[ns]"
+
+    df.col = df.col.astype("datetime64[ms]")
+    print(df.col)
+    print(df.col.dtype)
+    # TODO assertion
+
+
 def test_read_null_values(test_fgdb_vsi):
     df = read_dataframe(test_fgdb_vsi, read_geometry=False)
 
diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py
index cb342618..5498bee1 100644
--- a/pyogrio/tests/test_raw_io.py
+++ b/pyogrio/tests/test_raw_io.py
@@ -613,6 +613,20 @@ def test_read_datetime_millisecond(test_datetime):
     assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
 
 
+def test_read_datetime_tz(test_datetime_tz):
+    field = read(test_datetime_tz)[3][0]
+    assert field.dtype == "datetime64[ms]"
+    assert field[0] == np.datetime64(
+        "2020-01-01 09:00:00.123"
+    )  # timezone is ignored TODO should this be fixed as well?
+    assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
+    field = read(test_datetime_tz, datetime_as_string=True)[3][0]
+    assert field.dtype == "object"
+    # GDAL doesn't return strings in ISO format (yet)
+    assert field[0] == "2020/01/01 09:00:00.123-05"
+    assert field[1] == "2020/01/01 10:00:00-05"
+
+
 @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
 def test_read_write_null_geometry(tmp_path, ext):
     # Point(0, 0), null

From 1a2af4d179f2fa813325850cc278504352ede059 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Mon, 8 May 2023 22:29:33 +1000
Subject: [PATCH 05/51] working read

---
 pyogrio/geopandas.py               |  6 ++---
 pyogrio/tests/test_geopandas_io.py | 40 ++++++++++++++++++++++++------
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index c996cb3e..fa064679 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -146,10 +146,9 @@ def read_dataframe(
     path_or_buffer = _stringify_path(path_or_buffer)
 
     read_func = read_arrow if use_arrow else read
-    if not read_arrow and "datetime_as_string" not in kwargs:
+    if not use_arrow and "datetime_as_string" not in kwargs:
         kwargs["datetime_as_string"] = True
         # TODO work out what to do with the arrow api
-
     result = read_func(
         path_or_buffer,
         layer=layer,
@@ -189,8 +188,9 @@ def read_dataframe(
         index = pd.Index(index, name="fid")
     else:
         index = None
-
+    print(data)
     df = pd.DataFrame(data, columns=columns, index=index)
+    print(df)
     for dtype, c in zip(meta["dtypes"], df.columns):
         if dtype.startswith("datetime"):
             df[c] = pd.to_datetime(df[c], format="%Y/%m/%d %H:%M:%S%z")
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 9a14d1cf..4d1ec7c9 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -18,7 +18,11 @@
 
 try:
     import pandas as pd
-    from pandas.testing import assert_frame_equal, assert_index_equal
+    from pandas.testing import (
+        assert_frame_equal,
+        assert_index_equal,
+        assert_series_equal,
+    )
 
     import geopandas as gp
     from geopandas.array import from_wkt
@@ -143,14 +147,36 @@ def test_read_datetime_tz(test_datetime_tz):
     df = read_dataframe(test_datetime_tz)
     if Version(pd.__version__) >= Version("2.0.0"):
         # starting with pandas 2.0, it preserves the passed datetime resolution
-        assert df.col.dtype.name == "datetime64[ms]"
+        assert df.col.dtype.name == "datetime64[ms, pytz.FixedOffset(-300)]"
+    else:
+        assert df.col.dtype.name == "datetime64[ns, pytz.FixedOffset(-300)]"
+
+    assert_series_equal(
+        df.col,
+        pd.Series(
+            pd.to_datetime(
+                ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
+            ),
+            name="col",
+        ),
+    )
+
+    df = read_dataframe(test_datetime_tz, use_arrow=True)
+    if Version(pd.__version__) >= Version("2.0.0"):
+        # starting with pandas 2.0, it preserves the passed datetime resolution
+        assert df.col.dtype.name == "datetime64[ms, pytz.FixedOffset(-300)]"
     else:
-        assert df.col.dtype.name == "datetime64[ns]"
+        assert df.col.dtype.name == "datetime64[ns, pytz.FixedOffset(-300)]"
 
-    df.col = df.col.astype("datetime64[ms]")
-    print(df.col)
-    print(df.col.dtype)
-    # TODO assertion
+    assert_series_equal(
+        df.col,
+        pd.Series(
+            pd.to_datetime(
+                ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
+            ),
+            name="col",
+        ),
+    )
 
 
 def test_read_null_values(test_fgdb_vsi):

From fbd289892737e18d8f763d546dbb7593224233d2 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 9 May 2023 21:20:20 +1000
Subject: [PATCH 06/51] commit my test file

---
 pyogrio/tests/fixtures/test_datetime_tz.geojson | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 pyogrio/tests/fixtures/test_datetime_tz.geojson

diff --git a/pyogrio/tests/fixtures/test_datetime_tz.geojson b/pyogrio/tests/fixtures/test_datetime_tz.geojson
new file mode 100644
index 00000000..2fb72759
--- /dev/null
+++ b/pyogrio/tests/fixtures/test_datetime_tz.geojson
@@ -0,0 +1,7 @@
+{
+"type": "FeatureCollection",
+"features": [
+{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
+{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
+]
+}

From 127d0a7d5daadd8abd52e901b0213659a3c0c5c0 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Thu, 11 May 2023 09:08:05 +1000
Subject: [PATCH 07/51] actually fix tests with read working

---
 pyogrio/raw.py                     |  1 +
 pyogrio/tests/test_geopandas_io.py | 63 ++++++++++++++++++------------
 pyogrio/tests/test_raw_io.py       |  3 ++
 3 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 30b73931..38f429e3 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -393,6 +393,7 @@ def write(
     layer_options=None,
     **kwargs,
 ):
+    kwargs.pop("dtypes", None)
     if geometry_type is None:
         raise ValueError("geometry_type must be provided")
 
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 4d1ec7c9..44d22a01 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -143,40 +143,51 @@ def test_read_datetime(test_fgdb_vsi):
         assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
 
 
-def test_read_datetime_tz(test_datetime_tz):
+def test_read_datetime_tz(test_datetime_tz, tmp_path):
     df = read_dataframe(test_datetime_tz)
     if Version(pd.__version__) >= Version("2.0.0"):
         # starting with pandas 2.0, it preserves the passed datetime resolution
         assert df.col.dtype.name == "datetime64[ms, pytz.FixedOffset(-300)]"
     else:
         assert df.col.dtype.name == "datetime64[ns, pytz.FixedOffset(-300)]"
-
-    assert_series_equal(
-        df.col,
-        pd.Series(
-            pd.to_datetime(
-                ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
-            ),
-            name="col",
-        ),
+    expected_dt_col = pd.Series(
+        pd.to_datetime(["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]),
+        name="col",
     )
-
-    df = read_dataframe(test_datetime_tz, use_arrow=True)
-    if Version(pd.__version__) >= Version("2.0.0"):
-        # starting with pandas 2.0, it preserves the passed datetime resolution
-        assert df.col.dtype.name == "datetime64[ms, pytz.FixedOffset(-300)]"
-    else:
-        assert df.col.dtype.name == "datetime64[ns, pytz.FixedOffset(-300)]"
-
-    assert_series_equal(
-        df.col,
-        pd.Series(
-            pd.to_datetime(
-                ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
-            ),
-            name="col",
-        ),
+    assert_series_equal(df.col, expected_dt_col)
+    # test write and read round trips
+    fpath = tmp_path / "test.gpkg"
+    write_dataframe(df, fpath)
+    df_read = read_dataframe(fpath)
+    print("good\n", df.col)
+    import pytz
+
+    print(
+        "bad\n",
+        df_read.col,
+        "\n",
+        df_read.col.dt.tz_localize("UTC").dt.tz_convert(pytz.FixedOffset(-300)),
     )
+    assert_series_equal(df_read.col, expected_dt_col)
+
+    # assert_geodataframe_equal(df, df_read)
+
+    # df = read_dataframe(test_datetime_tz, use_arrow=True)
+    # if Version(pd.__version__) >= Version("2.0.0"):
+    #     # starting with pandas 2.0, it preserves the passed datetime resolution
+    #     assert df.col.dtype.name == "datetime64[ms, pytz.FixedOffset(-300)]"
+    # else:
+    #     assert df.col.dtype.name == "datetime64[ns, pytz.FixedOffset(-300)]"
+
+    # assert_series_equal(
+    #     df.col,
+    #     pd.Series(
+    #         pd.to_datetime(
+    #             ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
+    #         ),
+    #         name="col",
+    #     ),
+    # )
 
 
 def test_read_null_values(test_fgdb_vsi):
diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py
index 5498bee1..2d57da90 100644
--- a/pyogrio/tests/test_raw_io.py
+++ b/pyogrio/tests/test_raw_io.py
@@ -260,6 +260,7 @@ def test_read_fids_unsupported_keywords(naturalearth_lowres):
 
 def test_write(tmpdir, naturalearth_lowres):
     meta, _, geometry, field_data = read(naturalearth_lowres)
+    meta.pop("dtypes")
 
     filename = os.path.join(str(tmpdir), "test.shp")
     write(filename, geometry, field_data, **meta)
@@ -463,6 +464,7 @@ def assert_equal_result(result1, result2):
 def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext):
     meta, index, geometry, field_data = read(naturalearth_lowres)
     meta.update({"geometry_type": "Unknown"})
+    meta.pop("dtypes")
     filename = os.path.join(str(tmpdir), f"test.{ext}")
     write(filename, geometry, field_data, driver=driver, **meta)
 
@@ -489,6 +491,7 @@ def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi):
 def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
     meta, index, geometry, field_data = read(naturalearth_lowres)
     meta.update({"geometry_type": "Unknown"})
+    meta.pop("dtypes")
     filename = os.path.join(str(tmpdir), f"test.{ext}")
     write(filename, geometry, field_data, driver=driver, **meta)
 

From 016778ae8936da4966f2e228cecc4cee5d811e28 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sun, 21 May 2023 20:52:22 +1000
Subject: [PATCH 08/51] good enough wip progress for now

---
 pyogrio/_io.pyx                    | 13 ++++++---
 pyogrio/geopandas.py               | 17 ++++++++++--
 pyogrio/raw.py                     |  4 +++
 pyogrio/tests/test_geopandas_io.py | 42 ++++++------------------------
 4 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index fe311792..f74438d7 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -1488,7 +1488,7 @@ cdef infer_field_types(list dtypes):
 def ogr_write(
     str path, str layer, str driver, geometry, fields, field_data, field_mask,
     str crs, str geometry_type, str encoding, object dataset_kwargs,
-    object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True,
+    object layer_kwargs, tz_offsets, bint promote_to_multi=False, bint nan_as_null=True,
     bint append=False, dataset_metadata=None, layer_metadata=None
 ):
     cdef const char *path_c = NULL
@@ -1795,6 +1795,9 @@ def ogr_write(
                     if np.isnat(field_value):
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
+                        # TODO check if 0=unknown or 1=localtime is most appropriate
+                        tz = tz_offsets.get(fields[field_idx], 1)
+                        print(tz)
                         datetime = field_value.item()
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
@@ -1805,14 +1808,16 @@ def ogr_write(
                             0,
                             0,
                             0.0,
-                            0
+                            tz
                         )
 
                 elif field_type == OFTDateTime:
                     if np.isnat(field_value):
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
-                        # TODO: add support for timezones
+                        # TODO check if 0=unknown or 1=localtime is most appropriate
+                        tz = tz_offsets.get(fields[field_idx], 1)
+                        print(tz, type(tz))
                         datetime = field_value.astype("datetime64[ms]").item()
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
@@ -1823,7 +1828,7 @@ def ogr_write(
                             datetime.hour,
                             datetime.minute,
                             datetime.second + datetime.microsecond / 10**6,
-                            0
+                            tz
                         )
 
                 else:
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index fa064679..db70189e 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -1,3 +1,4 @@
+import datetime
 import numpy as np
 from pyogrio.raw import DRIVERS_NO_MIXED_SINGLE_MULTI, DRIVERS_NO_MIXED_DIMENSIONS
 from pyogrio.raw import detect_driver, read, read_arrow, write
@@ -193,7 +194,7 @@ def read_dataframe(
     print(df)
     for dtype, c in zip(meta["dtypes"], df.columns):
         if dtype.startswith("datetime"):
-            df[c] = pd.to_datetime(df[c], format="%Y/%m/%d %H:%M:%S%z")
+            df[c] = pd.to_datetime(df[c], format="ISO8601")
 
     if geometry is None or not read_geometry:
         return df
@@ -336,8 +337,19 @@ def write_dataframe(
     # TODO: may need to fill in pd.NA, etc
     field_data = []
     field_mask = []
+    tz_offsets = {} # gdal format timezone offsets by column
     for name in fields:
-        col = df[name].values
+        ser = df[name]
+        col = ser.values
+        if isinstance(ser.dtype, pd.DatetimeTZDtype):
+            if len(ser)>0:
+                # Assume series has same offset for all rows (enforced by definition in pandas)
+                tz_offsets[name] = int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15)) +100
+                # Convert to naive datetime, add offset back per above
+                col = ser.dt.tz_localize(None).values
+                
+            # edge case of length zero ignored, don't have a reference date
+
         if isinstance(col, pd.api.extensions.ExtensionArray):
             from pandas.arrays import IntegerArray, FloatingArray, BooleanArray
 
@@ -437,5 +449,6 @@ def write_dataframe(
         metadata=metadata,
         dataset_options=dataset_options,
         layer_options=layer_options,
+        tz_offsets=tz_offsets,
         **kwargs,
     )
diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 38f429e3..1228f672 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -391,6 +391,7 @@ def write(
     metadata=None,
     dataset_options=None,
     layer_options=None,
+    tz_offsets=None,
     **kwargs,
 ):
     kwargs.pop("dtypes", None)
@@ -440,6 +441,8 @@ def write(
             "projection information defined and may not be usable in other "
             "systems."
         )
+    if tz_offsets is None:
+        tz_offsets = {}
 
     # preprocess kwargs and split in dataset and layer creation options
     dataset_kwargs = _preprocess_options_key_value(dataset_options or {})
@@ -478,4 +481,5 @@ def write(
         layer_metadata=layer_metadata,
         dataset_kwargs=dataset_kwargs,
         layer_kwargs=layer_kwargs,
+        tz_offsets=tz_offsets,
     )
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 44d22a01..4257e7f8 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -136,6 +136,7 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext):
 @pytest.mark.filterwarnings("ignore: Measured")
 def test_read_datetime(test_fgdb_vsi):
     df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
+    print(df)
     if Version(pd.__version__) >= Version("2.0.0"):
         # starting with pandas 2.0, it preserves the passed datetime resolution
         assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
@@ -145,49 +146,22 @@ def test_read_datetime(test_fgdb_vsi):
 
 def test_read_datetime_tz(test_datetime_tz, tmp_path):
     df = read_dataframe(test_datetime_tz)
-    if Version(pd.__version__) >= Version("2.0.0"):
-        # starting with pandas 2.0, it preserves the passed datetime resolution
-        assert df.col.dtype.name == "datetime64[ms, pytz.FixedOffset(-300)]"
-    else:
-        assert df.col.dtype.name == "datetime64[ns, pytz.FixedOffset(-300)]"
+    
+    assert df.col.dtype.unit == "ns" # tz aware does not support ms resolution, even in pandas 2
     expected_dt_col = pd.Series(
-        pd.to_datetime(["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]),
+        pd.to_datetime(["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"], format="ISO8601"),
         name="col",
     )
     assert_series_equal(df.col, expected_dt_col)
     # test write and read round trips
-    fpath = tmp_path / "test.gpkg"
+    fpath = tmp_path / "test.geojson" # TODO gpkg doesn't work here, at least for my local gdal, writes NaT
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
-    print("good\n", df.col)
-    import pytz
-
-    print(
-        "bad\n",
-        df_read.col,
-        "\n",
-        df_read.col.dt.tz_localize("UTC").dt.tz_convert(pytz.FixedOffset(-300)),
-    )
+    print("a", df_read.col)
+    print("b", expected_dt_col)
     assert_series_equal(df_read.col, expected_dt_col)
 
-    # assert_geodataframe_equal(df, df_read)
-
-    # df = read_dataframe(test_datetime_tz, use_arrow=True)
-    # if Version(pd.__version__) >= Version("2.0.0"):
-    #     # starting with pandas 2.0, it preserves the passed datetime resolution
-    #     assert df.col.dtype.name == "datetime64[ms, pytz.FixedOffset(-300)]"
-    # else:
-    #     assert df.col.dtype.name == "datetime64[ns, pytz.FixedOffset(-300)]"
-
-    # assert_series_equal(
-    #     df.col,
-    #     pd.Series(
-    #         pd.to_datetime(
-    #             ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
-    #         ),
-    #         name="col",
-    #     ),
-    # )
+
 
 
 def test_read_null_values(test_fgdb_vsi):

From faa06311a94bd5a91bbcc9f4c56d222fe466a192 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sun, 21 May 2023 21:11:18 +1000
Subject: [PATCH 09/51] make these failures easier to read

---
 .github/workflows/tests-conda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests-conda.yml b/.github/workflows/tests-conda.yml
index e7fe62c0..bf867e7b 100644
--- a/.github/workflows/tests-conda.yml
+++ b/.github/workflows/tests-conda.yml
@@ -66,4 +66,4 @@ jobs:
 
       - name: Test
         run: |
-          pytest -v -r s pyogrio/tests
+          pytest -v --color=yes -r s pyogrio/tests

From a8c200e801d50356377ea5a994fd1bf4bc23159d Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sun, 21 May 2023 21:20:46 +1000
Subject: [PATCH 10/51] fix for non tz

---
 pyogrio/geopandas.py               | 15 ++++++++++++++-
 pyogrio/tests/test_geopandas_io.py | 13 +++++++------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index db70189e..455e7619 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -1,5 +1,7 @@
 import datetime
+import warnings
 import numpy as np
+import pandas as pd
 from pyogrio.raw import DRIVERS_NO_MIXED_SINGLE_MULTI, DRIVERS_NO_MIXED_DIMENSIONS
 from pyogrio.raw import detect_driver, read, read_arrow, write
 from pyogrio.errors import DataSourceError
@@ -19,6 +21,17 @@ def _stringify_path(path):
     # pass-though other objects
     return path
 
+def _try_parse_datetime(ser):
+    # TODO surely a better way of doing this
+    for format in ["ISO8601", "%y/%m/%d %H:%M:%S", "%y/%m/%d %H:%M:%S.%f", "%y/%m/%d %H:%M:%S.%f%z"]:
+        try: 
+            return pd.to_datetime(ser, format=format)
+        except ValueError:
+            pass
+    warnings.warn(f"Datetime parsing failed for column {ser.name!r}")
+    return ser
+                
+
 
 def read_dataframe(
     path_or_buffer,
@@ -194,7 +207,7 @@ def read_dataframe(
     print(df)
     for dtype, c in zip(meta["dtypes"], df.columns):
         if dtype.startswith("datetime"):
-            df[c] = pd.to_datetime(df[c], format="ISO8601")
+            df[c] = _try_parse_datetime(df[c])
 
     if geometry is None or not read_geometry:
         return df
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 4257e7f8..8509ddb5 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -136,12 +136,13 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext):
 @pytest.mark.filterwarnings("ignore: Measured")
 def test_read_datetime(test_fgdb_vsi):
     df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
-    print(df)
-    if Version(pd.__version__) >= Version("2.0.0"):
-        # starting with pandas 2.0, it preserves the passed datetime resolution
-        assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
-    else:
-        assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
+    # if Version(pd.__version__) >= Version("2.0.0"):
+    #     # starting with pandas 2.0, it preserves the passed datetime resolution
+    #     assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
+    # else:
+    #     assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
+    # String reading breaks this dtype preservation
+    assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
 
 
 def test_read_datetime_tz(test_datetime_tz, tmp_path):

From 604737597d10b8ec3f64c829b3c7c088432162b5 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Mon, 22 May 2023 21:14:58 +1000
Subject: [PATCH 11/51] fix some tests

---
 pyogrio/_io.pyx      | 6 +++---
 pyogrio/geopandas.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index f74438d7..e49b0704 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -725,7 +725,7 @@ cdef get_features(
 
     field_data = [
         np.empty(shape=(num_features, ),
-        # TODO cython has walrus?
+
         dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime") else fields[field_index,3])) for field_index in range(n_fields)
     ]
     types = [(fields[field_index,2],fields[field_index,3]) for field_index in range(n_fields)]
@@ -822,10 +822,10 @@ cdef get_features_by_fid(
     n_fields = fields.shape[0]
     field_indexes = fields[:,0]
     field_ogr_types = fields[:,1]
-    # TODO missing datetime dtype hack on this path?
     field_data = [
         np.empty(shape=(count, ),
-        dtype=fields[field_index,3]) for field_index in range(n_fields)
+
+        dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime") else fields[field_index,3])) for field_index in range(n_fields)
     ]
 
     field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 455e7619..3cd23898 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -1,7 +1,6 @@
 import datetime
 import warnings
 import numpy as np
-import pandas as pd
 from pyogrio.raw import DRIVERS_NO_MIXED_SINGLE_MULTI, DRIVERS_NO_MIXED_DIMENSIONS
 from pyogrio.raw import detect_driver, read, read_arrow, write
 from pyogrio.errors import DataSourceError
@@ -22,6 +21,7 @@ def _stringify_path(path):
     return path
 
 def _try_parse_datetime(ser):
+    import pandas as pd
     # TODO surely a better way of doing this
     for format in ["ISO8601", "%y/%m/%d %H:%M:%S", "%y/%m/%d %H:%M:%S.%f", "%y/%m/%d %H:%M:%S.%f%z"]:
         try: 

From 606156330e48777dec6ba85d113a50ddbf1a70f3 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Mon, 22 May 2023 21:44:16 +1000
Subject: [PATCH 12/51] run pre commit

---
 pyogrio/geopandas.py               | 24 ++++++++++++++++--------
 pyogrio/tests/test_geopandas_io.py | 18 +++++++++++-------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 3cd23898..45b18cc4 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -20,17 +20,23 @@ def _stringify_path(path):
     # pass-though other objects
     return path
 
+
 def _try_parse_datetime(ser):
     import pandas as pd
+
     # TODO surely a better way of doing this
-    for format in ["ISO8601", "%y/%m/%d %H:%M:%S", "%y/%m/%d %H:%M:%S.%f", "%y/%m/%d %H:%M:%S.%f%z"]:
-        try: 
+    for format in [
+        "ISO8601",
+        "%y/%m/%d %H:%M:%S",
+        "%y/%m/%d %H:%M:%S.%f",
+        "%y/%m/%d %H:%M:%S.%f%z",
+    ]:
+        try:
             return pd.to_datetime(ser, format=format)
         except ValueError:
             pass
     warnings.warn(f"Datetime parsing failed for column {ser.name!r}")
     return ser
-                
 
 
 def read_dataframe(
@@ -350,17 +356,19 @@ def write_dataframe(
     # TODO: may need to fill in pd.NA, etc
     field_data = []
     field_mask = []
-    tz_offsets = {} # gdal format timezone offsets by column
+    tz_offsets = {}  # gdal format timezone offsets by column
     for name in fields:
         ser = df[name]
         col = ser.values
         if isinstance(ser.dtype, pd.DatetimeTZDtype):
-            if len(ser)>0:
-                # Assume series has same offset for all rows (enforced by definition in pandas)
-                tz_offsets[name] = int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15)) +100
+            if len(ser) > 0:
+                # Assume series has same offset for all rows (enforced by pandas)
+                tz_offsets[name] = (
+                    int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15)) + 100
+                )
                 # Convert to naive datetime, add offset back per above
                 col = ser.dt.tz_localize(None).values
-                
+
             # edge case of length zero ignored, don't have a reference date
 
         if isinstance(col, pd.api.extensions.ExtensionArray):
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 8509ddb5..b8d32d16 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -1,7 +1,6 @@
 import contextlib
 from datetime import datetime
 import os
-from packaging.version import Version
 
 import numpy as np
 import pytest
@@ -147,15 +146,22 @@ def test_read_datetime(test_fgdb_vsi):
 
 def test_read_datetime_tz(test_datetime_tz, tmp_path):
     df = read_dataframe(test_datetime_tz)
-    
-    assert df.col.dtype.unit == "ns" # tz aware does not support ms resolution, even in pandas 2
+
+    assert (
+        df.col.dtype.unit == "ns"
+    )  # tz aware does not support ms resolution, even in pandas 2
     expected_dt_col = pd.Series(
-        pd.to_datetime(["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"], format="ISO8601"),
+        pd.to_datetime(
+            ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"],
+            format="ISO8601",
+        ),
         name="col",
     )
     assert_series_equal(df.col, expected_dt_col)
     # test write and read round trips
-    fpath = tmp_path / "test.geojson" # TODO gpkg doesn't work here, at least for my local gdal, writes NaT
+    fpath = (
+        tmp_path / "test.geojson"
+    )  # TODO gpkg doesn't work here, at least for my local gdal, writes NaT
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
     print("a", df_read.col)
@@ -163,8 +169,6 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     assert_series_equal(df_read.col, expected_dt_col)
 
 
-
-
 def test_read_null_values(test_fgdb_vsi):
     df = read_dataframe(test_fgdb_vsi, read_geometry=False)
 

From 3ba42cf37063a5618cb1e58bfc624321ff949afd Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 23 May 2023 21:12:22 +1000
Subject: [PATCH 13/51] maybe old pandas, can't reproduce locally

---
 pyogrio/geopandas.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 45b18cc4..276a7766 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -22,14 +22,24 @@ def _stringify_path(path):
 
 
 def _try_parse_datetime(ser):
-    import pandas as pd
+    import pandas as pd # only called in a block where pandas is known to be installed
+    try:
+        return pd.to_datetime(ser, format="ISO8601")
+    except ValueError:
+        pass
+    try:
+        return pd.to_datetime(ser, yearfirst=True)
+    except ValueError:
+        pass
 
     # TODO surely a better way of doing this
     for format in [
-        "ISO8601",
         "%y/%m/%d %H:%M:%S",
         "%y/%m/%d %H:%M:%S.%f",
         "%y/%m/%d %H:%M:%S.%f%z",
+        "%y/%m/%dT%H:%M:%S",
+        "%y/%m/%dT%H:%M:%S.%f",
+        "%y/%m/%dT%H:%M:%S.%f%z",
     ]:
         try:
             return pd.to_datetime(ser, format=format)

From d98314062ac68f865fdc5b6131adc0857b4005ef Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 23 May 2023 21:26:38 +1000
Subject: [PATCH 14/51] try and find something pandas 1.5 also happy with

---
 pyogrio/tests/test_geopandas_io.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index b8d32d16..037417fe 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -153,7 +153,9 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     expected_dt_col = pd.Series(
         pd.to_datetime(
             ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"],
-            format="ISO8601",
+            format='mixed',
+            yearfirst=True,
+            dayfirst=True
         ),
         name="col",
     )

From e9993bdbc4e6726507d57534872ef39cd67a5d55 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 23 May 2023 21:27:45 +1000
Subject: [PATCH 15/51] lint

---
 pyogrio/geopandas.py               | 3 ++-
 pyogrio/tests/test_geopandas_io.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 276a7766..9182f67c 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -22,7 +22,8 @@ def _stringify_path(path):
 
 
 def _try_parse_datetime(ser):
-    import pandas as pd # only called in a block where pandas is known to be installed
+    import pandas as pd  # only called in a block where pandas is known to be installed
+
     try:
         return pd.to_datetime(ser, format="ISO8601")
     except ValueError:
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 037417fe..293a819e 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -153,9 +153,9 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     expected_dt_col = pd.Series(
         pd.to_datetime(
             ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"],
-            format='mixed',
+            format="mixed",
             yearfirst=True,
-            dayfirst=True
+            dayfirst=True,
         ),
         name="col",
     )

From b6ca5cf216af1b04097163d4f22e49e90563bcc8 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 23 May 2023 21:39:06 +1000
Subject: [PATCH 16/51] simple answer

---
 pyogrio/tests/test_geopandas_io.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 293a819e..397a85c6 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -1,7 +1,7 @@
 import contextlib
 from datetime import datetime
 import os
-
+from packaging.version import Version
 import numpy as np
 import pytest
 
@@ -150,12 +150,14 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     assert (
         df.col.dtype.unit == "ns"
     )  # tz aware does not support ms resolution, even in pandas 2
+    if Version(pd.__version__) >= Version("2.0.0"):
+        format_ = "ISO8601"
+    else:
+        format_ = None
     expected_dt_col = pd.Series(
         pd.to_datetime(
             ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"],
-            format="mixed",
-            yearfirst=True,
-            dayfirst=True,
+            format=format_,
         ),
         name="col",
     )

From 05cc1cf2e36ff34c271b13d73a3c6b4098ee203c Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Thu, 25 May 2023 19:49:42 +1000
Subject: [PATCH 17/51] cleanup

---
 pyogrio/_io.pyx              | 9 +++------
 pyogrio/geopandas.py         | 8 +++-----
 pyogrio/tests/test_raw_io.py | 8 ++------
 3 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index e49b0704..c59787c5 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -725,11 +725,10 @@ cdef get_features(
 
     field_data = [
         np.empty(shape=(num_features, ),
-
-        dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime") else fields[field_index,3])) for field_index in range(n_fields)
+        dtype = ("object" if datetime_as_string and 
+                    fields[field_index,3].startswith("datetime") else fields[field_index,3])
+        ) for field_index in range(n_fields)
     ]
-    types = [(fields[field_index,2],fields[field_index,3]) for field_index in range(n_fields)]
-    print("types are", types)
 
     field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
     i = 0
@@ -1797,7 +1796,6 @@ def ogr_write(
                     else:
                         # TODO check if 0=unknown or 1=localtime is most appropriate
                         tz = tz_offsets.get(fields[field_idx], 1)
-                        print(tz)
                         datetime = field_value.item()
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
@@ -1817,7 +1815,6 @@ def ogr_write(
                     else:
                         # TODO check if 0=unknown or 1=localtime is most appropriate
                         tz = tz_offsets.get(fields[field_idx], 1)
-                        print(tz, type(tz))
                         datetime = field_value.astype("datetime64[ms]").item()
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 9182f67c..96fc54da 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -210,8 +210,6 @@ def read_dataframe(
             return df
 
     meta, index, geometry, field_data = result
-    print("meta is")
-    print(meta)
 
     columns = meta["fields"].tolist()
     data = {columns[i]: field_data[i] for i in range(len(columns))}
@@ -219,9 +217,7 @@ def read_dataframe(
         index = pd.Index(index, name="fid")
     else:
         index = None
-    print(data)
     df = pd.DataFrame(data, columns=columns, index=index)
-    print(df)
     for dtype, c in zip(meta["dtypes"], df.columns):
         if dtype.startswith("datetime"):
             df[c] = _try_parse_datetime(df[c])
@@ -375,7 +371,9 @@ def write_dataframe(
             if len(ser) > 0:
                 # Assume series has same offset for all rows (enforced by pandas)
                 tz_offsets[name] = (
-                    int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15)) + 100
+                    # gdal stores timezones in 15 minute increments offset from 100=GMT
+                    int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15))
+                    + 100
                 )
                 # Convert to naive datetime, add offset back per above
                 col = ser.dt.tz_localize(None).values
diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py
index 2d57da90..19d9d049 100644
--- a/pyogrio/tests/test_raw_io.py
+++ b/pyogrio/tests/test_raw_io.py
@@ -260,7 +260,6 @@ def test_read_fids_unsupported_keywords(naturalearth_lowres):
 
 def test_write(tmpdir, naturalearth_lowres):
     meta, _, geometry, field_data = read(naturalearth_lowres)
-    meta.pop("dtypes")
 
     filename = os.path.join(str(tmpdir), "test.shp")
     write(filename, geometry, field_data, **meta)
@@ -464,7 +463,6 @@ def assert_equal_result(result1, result2):
 def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext):
     meta, index, geometry, field_data = read(naturalearth_lowres)
     meta.update({"geometry_type": "Unknown"})
-    meta.pop("dtypes")
     filename = os.path.join(str(tmpdir), f"test.{ext}")
     write(filename, geometry, field_data, driver=driver, **meta)
 
@@ -491,7 +489,6 @@ def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi):
 def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
     meta, index, geometry, field_data = read(naturalearth_lowres)
     meta.update({"geometry_type": "Unknown"})
-    meta.pop("dtypes")
     filename = os.path.join(str(tmpdir), f"test.{ext}")
     write(filename, geometry, field_data, driver=driver, **meta)
 
@@ -619,9 +616,8 @@ def test_read_datetime_millisecond(test_datetime):
 def test_read_datetime_tz(test_datetime_tz):
     field = read(test_datetime_tz)[3][0]
     assert field.dtype == "datetime64[ms]"
-    assert field[0] == np.datetime64(
-        "2020-01-01 09:00:00.123"
-    )  # timezone is ignored TODO should this be fixed as well?
+    # timezone is ignored in numpy layer
+    assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
     assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
     field = read(test_datetime_tz, datetime_as_string=True)[3][0]
     assert field.dtype == "object"

From a78a76cecbbfea20785c3247fd118463f30038b5 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 3 Jun 2023 13:46:51 +1000
Subject: [PATCH 18/51] wip, use strings to make multi timezones round trip

---
 pyogrio/_io.pyx                    | 14 +++++++++-----
 pyogrio/geopandas.py               | 29 +++++++++++++++-------------
 pyogrio/raw.py                     |  8 ++++----
 pyogrio/tests/test_geopandas_io.py | 31 +++++++++++++++++++++++++-----
 4 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index c59787c5..a3e2783a 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -1487,7 +1487,8 @@ cdef infer_field_types(list dtypes):
 def ogr_write(
     str path, str layer, str driver, geometry, fields, field_data, field_mask,
     str crs, str geometry_type, str encoding, object dataset_kwargs,
-    object layer_kwargs, tz_offsets, bint promote_to_multi=False, bint nan_as_null=True,
+    object layer_kwargs, #tz_offsets, 
+    bint promote_to_multi=False, bint nan_as_null=True,
     bint append=False, dataset_metadata=None, layer_metadata=None
 ):
     cdef const char *path_c = NULL
@@ -1795,8 +1796,9 @@ def ogr_write(
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
                         # TODO check if 0=unknown or 1=localtime is most appropriate
-                        tz = tz_offsets.get(fields[field_idx], 1)
+                        # tz = tz_offsets.get(fields[field_idx], 1)
                         datetime = field_value.item()
+                        # print("writing date", tz, datetime)
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
                             field_idx,
@@ -1806,7 +1808,7 @@ def ogr_write(
                             0,
                             0,
                             0.0,
-                            tz
+                            0
                         )
 
                 elif field_type == OFTDateTime:
@@ -1814,8 +1816,10 @@ def ogr_write(
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
                         # TODO check if 0=unknown or 1=localtime is most appropriate
-                        tz = tz_offsets.get(fields[field_idx], 1)
+                        # tz = tz_offsets.get(fields[field_idx], 1)
                         datetime = field_value.astype("datetime64[ms]").item()
+                        # print(f"writing date=({tz}), dt=({datetime})")
+
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
                             field_idx,
@@ -1825,7 +1829,7 @@ def ogr_write(
                             datetime.hour,
                             datetime.minute,
                             datetime.second + datetime.microsecond / 10**6,
-                            tz
+                            0
                         )
 
                 else:
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 96fc54da..86eccba1 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -1,4 +1,3 @@
-import datetime
 import warnings
 import numpy as np
 from pyogrio.raw import DRIVERS_NO_MIXED_SINGLE_MULTI, DRIVERS_NO_MIXED_DIMENSIONS
@@ -24,6 +23,8 @@ def _stringify_path(path):
 def _try_parse_datetime(ser):
     import pandas as pd  # only called in a block where pandas is known to be installed
 
+    print("got series", ser, ser.to_dict())
+
     try:
         return pd.to_datetime(ser, format="ISO8601")
     except ValueError:
@@ -34,7 +35,7 @@ def _try_parse_datetime(ser):
         pass
 
     # TODO surely a better way of doing this
-    for format in [
+    for fmt in [
         "%y/%m/%d %H:%M:%S",
         "%y/%m/%d %H:%M:%S.%f",
         "%y/%m/%d %H:%M:%S.%f%z",
@@ -43,10 +44,11 @@ def _try_parse_datetime(ser):
         "%y/%m/%dT%H:%M:%S.%f%z",
     ]:
         try:
-            return pd.to_datetime(ser, format=format)
+            return pd.to_datetime(ser, format=fmt)
         except ValueError:
             pass
     warnings.warn(f"Datetime parsing failed for column {ser.name!r}")
+    print("---------------failed parsing")
     return ser
 
 
@@ -363,21 +365,22 @@ def write_dataframe(
     # TODO: may need to fill in pd.NA, etc
     field_data = []
     field_mask = []
-    tz_offsets = {}  # gdal format timezone offsets by column
+    # dtypes = df[fields].dtypes().to_dict()
+    # tz_offsets = {}  # gdal format timezone offsets by column
     for name in fields:
         ser = df[name]
         col = ser.values
         if isinstance(ser.dtype, pd.DatetimeTZDtype):
             if len(ser) > 0:
                 # Assume series has same offset for all rows (enforced by pandas)
-                tz_offsets[name] = (
-                    # gdal stores timezones in 15 minute increments offset from 100=GMT
-                    int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15))
-                    + 100
-                )
-                # Convert to naive datetime, add offset back per above
-                col = ser.dt.tz_localize(None).values
-
+                # tz_offsets[name] = (
+                #     # gdal stores tz in 15 minute increments offset from 100=GMT
+                #     int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15))
+                #     + 100
+                # )
+                # Convert to naive datetime, offset passed through via dtypes
+                # col = ser.dt.tz_localize(None).values
+                col = ser.astype(str)
             # edge case of length zero ignored, don't have a reference date
 
         if isinstance(col, pd.api.extensions.ExtensionArray):
@@ -479,6 +482,6 @@ def write_dataframe(
         metadata=metadata,
         dataset_options=dataset_options,
         layer_options=layer_options,
-        tz_offsets=tz_offsets,
+        # tz_offsets=tz_offsets,
         **kwargs,
     )
diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 1228f672..0a9c746b 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -391,7 +391,7 @@ def write(
     metadata=None,
     dataset_options=None,
     layer_options=None,
-    tz_offsets=None,
+    # tz_offsets=None,
     **kwargs,
 ):
     kwargs.pop("dtypes", None)
@@ -441,8 +441,8 @@ def write(
             "projection information defined and may not be usable in other "
             "systems."
         )
-    if tz_offsets is None:
-        tz_offsets = {}
+    # if tz_offsets is None:
+    #     tz_offsets = {}
 
     # preprocess kwargs and split in dataset and layer creation options
     dataset_kwargs = _preprocess_options_key_value(dataset_options or {})
@@ -481,5 +481,5 @@ def write(
         layer_metadata=layer_metadata,
         dataset_kwargs=dataset_kwargs,
         layer_kwargs=layer_kwargs,
-        tz_offsets=tz_offsets,
+        # tz_offsets=tz_offsets,
     )
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 397a85c6..fae710bc 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -22,6 +22,7 @@
         assert_index_equal,
         assert_series_equal,
     )
+    import pytz
 
     import geopandas as gp
     from geopandas.array import from_wkt
@@ -163,16 +164,36 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     )
     assert_series_equal(df.col, expected_dt_col)
     # test write and read round trips
-    fpath = (
-        tmp_path / "test.geojson"
-    )  # TODO gpkg doesn't work here, at least for my local gdal, writes NaT
+    # TODO gpkg doesn't work here, at least for my local gdal, writes NaT
+    fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
-    print("a", df_read.col)
-    print("b", expected_dt_col)
     assert_series_equal(df_read.col, expected_dt_col)
 
 
+def test_write_datetime_mixed_offset(tmp_path):
+    # Summer Time (GMT+11), standard time (GMT+10)
+    dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
+    tz = pytz.timezone("Australia/Sydney")
+    ser_naive = pd.Series(pd.to_datetime(dates), name="dates")
+    ser_localised = ser_naive.dt.tz_localize(tz)
+    df = gp.GeoDataFrame(
+        {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]}
+    )
+    write_dataframe(df, "foo.geojson")
+    df_no_tz = read_dataframe(
+        "foo.geojson", datetime_as_string=False
+    )  # TODO this shouldn't be called datetime as string in the pandas layer,
+    #     should it even be accessible?
+    # datetime_as_string=False ignores tz info, returns datetime objects
+    expected = ser_naive.astype("datetime64[ms]")
+    assert_series_equal(expected, df_no_tz["dates"])
+    # datetime_as_string=True keeps tz info, but pandas can't handle multiple offsets
+    # unless given a timezone to identify them with -> returned as strings
+    df_local = read_dataframe("foo.geojson", datetime_as_string=True)
+    assert_series_equal(ser_localised.astype("object"), df_local["dates"])
+
+
 def test_read_null_values(test_fgdb_vsi):
     df = read_dataframe(test_fgdb_vsi, read_geometry=False)
 

From b6816567f8edab6e63430151b2c586a7379622a8 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 3 Jun 2023 13:49:16 +1000
Subject: [PATCH 19/51] use tmp path fixture

---
 pyogrio/tests/test_geopandas_io.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index fae710bc..2996da5e 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -180,9 +180,10 @@ def test_write_datetime_mixed_offset(tmp_path):
     df = gp.GeoDataFrame(
         {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]}
     )
-    write_dataframe(df, "foo.geojson")
+    fpath = tmp_path / "test.geojson"
+    write_dataframe(df, fpath)
     df_no_tz = read_dataframe(
-        "foo.geojson", datetime_as_string=False
+        fpath, datetime_as_string=False
     )  # TODO this shouldn't be called datetime as string in the pandas layer,
     #     should it even be accessible?
     # datetime_as_string=False ignores tz info, returns datetime objects
@@ -190,7 +191,7 @@ def test_write_datetime_mixed_offset(tmp_path):
     assert_series_equal(expected, df_no_tz["dates"])
     # datetime_as_string=True keeps tz info, but pandas can't handle multiple offsets
     # unless given a timezone to identify them with -> returned as strings
-    df_local = read_dataframe("foo.geojson", datetime_as_string=True)
+    df_local = read_dataframe(fpath, datetime_as_string=True)
     assert_series_equal(ser_localised.astype("object"), df_local["dates"])
 
 

From 3426fdc8b66a9c067b921595a4911d41482f447e Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 3 Jun 2023 15:36:23 +1000
Subject: [PATCH 20/51] cleanups

---
 pyogrio/_io.pyx           | 15 ++++-----------
 pyogrio/geopandas.py      | 21 ++-------------------
 pyogrio/raw.py            |  6 +-----
 pyogrio/tests/conftest.py |  1 -
 4 files changed, 7 insertions(+), 36 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index a3e2783a..7d6c578c 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -823,8 +823,9 @@ cdef get_features_by_fid(
     field_ogr_types = fields[:,1]
     field_data = [
         np.empty(shape=(count, ),
-
-        dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime") else fields[field_index,3])) for field_index in range(n_fields)
+        dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime") 
+            else fields[field_index,3])) 
+        for field_index in range(n_fields)
     ]
 
     field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
@@ -1487,8 +1488,7 @@ cdef infer_field_types(list dtypes):
 def ogr_write(
     str path, str layer, str driver, geometry, fields, field_data, field_mask,
     str crs, str geometry_type, str encoding, object dataset_kwargs,
-    object layer_kwargs, #tz_offsets, 
-    bint promote_to_multi=False, bint nan_as_null=True,
+    object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True,
     bint append=False, dataset_metadata=None, layer_metadata=None
 ):
     cdef const char *path_c = NULL
@@ -1795,10 +1795,7 @@ def ogr_write(
                     if np.isnat(field_value):
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
-                        # TODO check if 0=unknown or 1=localtime is most appropriate
-                        # tz = tz_offsets.get(fields[field_idx], 1)
                         datetime = field_value.item()
-                        # print("writing date", tz, datetime)
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
                             field_idx,
@@ -1815,11 +1812,7 @@ def ogr_write(
                     if np.isnat(field_value):
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
-                        # TODO check if 0=unknown or 1=localtime is most appropriate
-                        # tz = tz_offsets.get(fields[field_idx], 1)
                         datetime = field_value.astype("datetime64[ms]").item()
-                        # print(f"writing date=({tz}), dt=({datetime})")
-
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
                             field_idx,
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 86eccba1..7b8b3f5f 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -23,8 +23,6 @@ def _stringify_path(path):
 def _try_parse_datetime(ser):
     import pandas as pd  # only called in a block where pandas is known to be installed
 
-    print("got series", ser, ser.to_dict())
-
     try:
         return pd.to_datetime(ser, format="ISO8601")
     except ValueError:
@@ -48,7 +46,6 @@ def _try_parse_datetime(ser):
         except ValueError:
             pass
     warnings.warn(f"Datetime parsing failed for column {ser.name!r}")
-    print("---------------failed parsing")
     return ser
 
 
@@ -181,7 +178,6 @@ def read_dataframe(
     read_func = read_arrow if use_arrow else read
     if not use_arrow and "datetime_as_string" not in kwargs:
         kwargs["datetime_as_string"] = True
-        # TODO work out what to do with the arrow api
     result = read_func(
         path_or_buffer,
         layer=layer,
@@ -201,7 +197,6 @@ def read_dataframe(
     )
 
     if use_arrow:
-        # TODO what happens to datetimes with arrow?
         meta, table = result
         df = table.to_pandas()
         geometry_name = meta["geometry_name"] or "wkb_geometry"
@@ -365,23 +360,12 @@ def write_dataframe(
     # TODO: may need to fill in pd.NA, etc
     field_data = []
     field_mask = []
-    # dtypes = df[fields].dtypes().to_dict()
-    # tz_offsets = {}  # gdal format timezone offsets by column
     for name in fields:
         ser = df[name]
         col = ser.values
         if isinstance(ser.dtype, pd.DatetimeTZDtype):
-            if len(ser) > 0:
-                # Assume series has same offset for all rows (enforced by pandas)
-                # tz_offsets[name] = (
-                #     # gdal stores tz in 15 minute increments offset from 100=GMT
-                #     int(ser.iloc[0].utcoffset() / datetime.timedelta(minutes=15))
-                #     + 100
-                # )
-                # Convert to naive datetime, offset passed through via dtypes
-                # col = ser.dt.tz_localize(None).values
-                col = ser.astype(str)
-            # edge case of length zero ignored, don't have a reference date
+            # Deal with datetimes with timezones as strings
+            col = ser.astype(str)
 
         if isinstance(col, pd.api.extensions.ExtensionArray):
             from pandas.arrays import IntegerArray, FloatingArray, BooleanArray
@@ -482,6 +466,5 @@ def write_dataframe(
         metadata=metadata,
         dataset_options=dataset_options,
         layer_options=layer_options,
-        # tz_offsets=tz_offsets,
         **kwargs,
     )
diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 0a9c746b..1d9eade8 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -111,7 +111,7 @@ def read(
         If True, will return the FIDs of the feature that were read.
     datetime_as_string : bool, optional (default: False)
         If True, will return datetime dtypes as detected by GDAL as a string
-        array, instead of a datetime64 array.
+        array, instead of a datetime64 array (used to extract timezone info).
 
     **kwargs
         Additional driver-specific dataset open options passed to OGR.  Invalid
@@ -391,7 +391,6 @@ def write(
     metadata=None,
     dataset_options=None,
     layer_options=None,
-    # tz_offsets=None,
     **kwargs,
 ):
     kwargs.pop("dtypes", None)
@@ -441,8 +440,6 @@ def write(
             "projection information defined and may not be usable in other "
             "systems."
         )
-    # if tz_offsets is None:
-    #     tz_offsets = {}
 
     # preprocess kwargs and split in dataset and layer creation options
     dataset_kwargs = _preprocess_options_key_value(dataset_options or {})
@@ -481,5 +478,4 @@ def write(
         layer_metadata=layer_metadata,
         dataset_kwargs=dataset_kwargs,
         layer_kwargs=layer_kwargs,
-        # tz_offsets=tz_offsets,
     )
diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py
index ef47fc96..8583e6d7 100644
--- a/pyogrio/tests/conftest.py
+++ b/pyogrio/tests/conftest.py
@@ -43,7 +43,6 @@ def prepare_testfile(testfile_path, dst_dir, ext):
         # For .gpkg, spatial_index=False to avoid the rows being reordered
         meta["spatial_index"] = False
         meta["geometry_type"] = "MultiPolygon"
-    meta.pop("dtypes")
 
     write(dst_path, geometry, field_data, **meta)
     return dst_path

From bb6fd4ecb27de81a860d54b4ddc7ccb94991f4a7 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 3 Jun 2023 15:43:02 +1000
Subject: [PATCH 21/51] try cleanup datetime parsing

---
 pyogrio/geopandas.py | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 7b8b3f5f..e453d914 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -1,8 +1,8 @@
-import warnings
 import numpy as np
 from pyogrio.raw import DRIVERS_NO_MIXED_SINGLE_MULTI, DRIVERS_NO_MIXED_DIMENSIONS
 from pyogrio.raw import detect_driver, read, read_arrow, write
 from pyogrio.errors import DataSourceError
+from packaging.version import Version
 
 
 def _stringify_path(path):
@@ -23,30 +23,10 @@ def _stringify_path(path):
 def _try_parse_datetime(ser):
     import pandas as pd  # only called in a block where pandas is known to be installed
 
-    try:
+    if Version(pd.__version__) >= Version("2.0.0"):
         return pd.to_datetime(ser, format="ISO8601")
-    except ValueError:
-        pass
-    try:
+    else:
         return pd.to_datetime(ser, yearfirst=True)
-    except ValueError:
-        pass
-
-    # TODO surely a better way of doing this
-    for fmt in [
-        "%y/%m/%d %H:%M:%S",
-        "%y/%m/%d %H:%M:%S.%f",
-        "%y/%m/%d %H:%M:%S.%f%z",
-        "%y/%m/%dT%H:%M:%S",
-        "%y/%m/%dT%H:%M:%S.%f",
-        "%y/%m/%dT%H:%M:%S.%f%z",
-    ]:
-        try:
-            return pd.to_datetime(ser, format=fmt)
-        except ValueError:
-            pass
-    warnings.warn(f"Datetime parsing failed for column {ser.name!r}")
-    return ser
 
 
 def read_dataframe(

From 87419ac4ca01f4be24350d08709921b90ae0a116 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 3 Jun 2023 16:04:59 +1000
Subject: [PATCH 22/51] more cleanup, realise we can get dt resolution

---
 pyogrio/_io.pyx                    |  6 +++---
 pyogrio/geopandas.py               |  5 ++++-
 pyogrio/tests/test_geopandas_io.py | 18 +++++++-----------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index 7d6c578c..91a518ef 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -658,10 +658,10 @@ cdef process_fields(
             data[i] = bin_value[:ret_length]
 
         elif field_type == OFTDateTime or field_type == OFTDate:
-            # defer datetime parsing to pandas layer 
+            
             if datetime_as_string:
-                value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
-                data[i] = value
+                # defer datetime parsing to user/ pandas layer 
+                data[i] = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
             else:
                 success = OGR_F_GetFieldAsDateTimeEx(
                     ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index e453d914..38cb0ba9 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -24,7 +24,10 @@ def _try_parse_datetime(ser):
     import pandas as pd  # only called in a block where pandas is known to be installed
 
     if Version(pd.__version__) >= Version("2.0.0"):
-        return pd.to_datetime(ser, format="ISO8601")
+        res = pd.to_datetime(ser, format="ISO8601")
+        if res.dtype != "object":
+            res = res.dt.as_unit("ms")
+        return res
     else:
         return pd.to_datetime(ser, yearfirst=True)
 
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 2996da5e..3213bad1 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -136,21 +136,17 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext):
 @pytest.mark.filterwarnings("ignore: Measured")
 def test_read_datetime(test_fgdb_vsi):
     df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
-    # if Version(pd.__version__) >= Version("2.0.0"):
-    #     # starting with pandas 2.0, it preserves the passed datetime resolution
-    #     assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
-    # else:
-    #     assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
-    # String reading breaks this dtype preservation
-    assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
+    if Version(pd.__version__) >= Version("2.0.0"):
+        # starting with pandas 2.0, it preserves the passed datetime resolution
+        assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
+    else:
+        assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
 
 
 def test_read_datetime_tz(test_datetime_tz, tmp_path):
     df = read_dataframe(test_datetime_tz)
 
-    assert (
-        df.col.dtype.unit == "ns"
-    )  # tz aware does not support ms resolution, even in pandas 2
+    assert df.col.dtype.unit == "ms"
     if Version(pd.__version__) >= Version("2.0.0"):
         format_ = "ISO8601"
     else:
@@ -161,7 +157,7 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
             format=format_,
         ),
         name="col",
-    )
+    ).dt.as_unit("ms")
     assert_series_equal(df.col, expected_dt_col)
     # test write and read round trips
     # TODO gpkg doesn't work here, at least for my local gdal, writes NaT

From fc78bd93df6d72e8b63d10d64e7e73e2a7d03a31 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 3 Jun 2023 16:21:07 +1000
Subject: [PATCH 23/51] more careful pandas 1.5 compat

---
 pyogrio/tests/test_geopandas_io.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 3213bad1..1b9f467e 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -147,24 +147,22 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     df = read_dataframe(test_datetime_tz)
 
     assert df.col.dtype.unit == "ms"
+    raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
+
     if Version(pd.__version__) >= Version("2.0.0"):
-        format_ = "ISO8601"
+
+        expected = pd.to_datetime(raw_expected, format="ISO8601").as_unit("ms")
     else:
-        format_ = None
-    expected_dt_col = pd.Series(
-        pd.to_datetime(
-            ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"],
-            format=format_,
-        ),
-        name="col",
-    ).dt.as_unit("ms")
-    assert_series_equal(df.col, expected_dt_col)
+        expected = pd.to_datetime(raw_expected)
+    expected = pd.Series(expected, name="col")
+
+    assert_series_equal(df.col, expected)
     # test write and read round trips
     # TODO gpkg doesn't work here, at least for my local gdal, writes NaT
     fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
-    assert_series_equal(df_read.col, expected_dt_col)
+    assert_series_equal(df_read.col, expected)
 
 
 def test_write_datetime_mixed_offset(tmp_path):

From 5fab3480c531fa771aae62843787f28f9fc2cb27 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 3 Jun 2023 16:25:21 +1000
Subject: [PATCH 24/51] delete line

---
 pyogrio/tests/test_geopandas_io.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 1b9f467e..936a583e 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -145,12 +145,9 @@ def test_read_datetime(test_fgdb_vsi):
 
 def test_read_datetime_tz(test_datetime_tz, tmp_path):
     df = read_dataframe(test_datetime_tz)
-
-    assert df.col.dtype.unit == "ms"
     raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
 
     if Version(pd.__version__) >= Version("2.0.0"):
-
         expected = pd.to_datetime(raw_expected, format="ISO8601").as_unit("ms")
     else:
         expected = pd.to_datetime(raw_expected)

From 26c403adfb6cca70421a34e0d9e488407b4decef Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 8 Aug 2023 20:20:15 +1000
Subject: [PATCH 25/51] replace write support with working datetime object
 solution

---
 pyogrio/_io.pyx                    | 22 ++++++++++++++++++++--
 pyogrio/geopandas.py               | 17 ++++++++++++++---
 pyogrio/raw.py                     |  2 ++
 pyogrio/tests/test_geopandas_io.py | 16 ++++------------
 4 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index 91a518ef..39b5a7a1 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -1484,12 +1484,22 @@ cdef infer_field_types(list dtypes):
     return field_types
 
 
+FIFTEEN_MINUTE_DELTA = datetime.timedelta(minutes=15)
+
+cdef int timezone_to_gdal_offset(tz_as_datetime):
+    """Convert to GDAL timezone offset representation.
+    
+    https://gdal.org/development/rfc/rfc56_millisecond_precision.html#core-changes
+    """
+    return tz_as_datetime.utcoffset() / FIFTEEN_MINUTE_DELTA + 100
+
 # TODO: set geometry and field data as memory views?
 def ogr_write(
     str path, str layer, str driver, geometry, fields, field_data, field_mask,
     str crs, str geometry_type, str encoding, object dataset_kwargs,
     object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True,
-    bint append=False, dataset_metadata=None, layer_metadata=None
+    bint append=False, dataset_metadata=None, layer_metadata=None,
+    timezone_cols_metadata=None
 ):
     cdef const char *path_c = NULL
     cdef const char *layer_c = NULL
@@ -1542,6 +1552,9 @@ def ogr_write(
     if not layer:
         layer = os.path.splitext(os.path.split(path)[1])[0]
 
+    if timezone_cols_metadata is None:
+        timezone_cols_metadata = {}
+
 
     # if shapefile, GeoJSON, or FlatGeobuf, always delete first
     # for other types, check if we can create layers
@@ -1813,6 +1826,11 @@ def ogr_write(
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
                         datetime = field_value.astype("datetime64[ms]").item()
+                        tz_array = timezone_cols_metadata.get(fields[field_idx], None)
+                        if tz_array is None:
+                            gdal_tz = 0
+                        else:
+                            gdal_tz = timezone_to_gdal_offset(tz_array[i])
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
                             field_idx,
@@ -1822,7 +1840,7 @@ def ogr_write(
                             datetime.hour,
                             datetime.minute,
                             datetime.second + datetime.microsecond / 10**6,
-                            0
+                            gdal_tz
                         )
 
                 else:
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 38cb0ba9..fa4b9eb0 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -343,13 +343,24 @@ def write_dataframe(
     # TODO: may need to fill in pd.NA, etc
     field_data = []
     field_mask = []
+    timezone_cols_metadata = (
+        {}
+    )  # dict[str,np.array(datetime.datetime)] special case for dt-tz fields
     for name in fields:
         ser = df[name]
         col = ser.values
         if isinstance(ser.dtype, pd.DatetimeTZDtype):
-            # Deal with datetimes with timezones as strings
-            col = ser.astype(str)
-
+            # Deal with datetimes with timezones by passing down timezone separately
+            # pass down naive datetime
+            col = ser.dt.tz_localize(None).values
+            # pandas only supports a single offset per column
+            # access via array since we want a numpy array not a series
+            # (only care about the utc offset, not actually the date)
+            # but ser.array.timetz won't have valid utc offset for pytz time zones
+            # (per https://docs.python.org/3/library/datetime.html#datetime.time.utcoffset) # noqa
+            timezone_cols_metadata[name] = ser.array.to_pydatetime()
+        else:
+            col = ser.values
         if isinstance(col, pd.api.extensions.ExtensionArray):
             from pandas.arrays import IntegerArray, FloatingArray, BooleanArray
 
diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 1d9eade8..9128243e 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -391,6 +391,7 @@ def write(
     metadata=None,
     dataset_options=None,
     layer_options=None,
+    timezone_cols_metadata=None,
     **kwargs,
 ):
     kwargs.pop("dtypes", None)
@@ -478,4 +479,5 @@ def write(
         layer_metadata=layer_metadata,
         dataset_kwargs=dataset_kwargs,
         layer_kwargs=layer_kwargs,
+        timezone_cols_metadata=timezone_cols_metadata,
     )
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 936a583e..46b61abd 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -155,8 +155,7 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
 
     assert_series_equal(df.col, expected)
     # test write and read round trips
-    # TODO gpkg doesn't work here, at least for my local gdal, writes NaT
-    fpath = tmp_path / "test.geojson"
+    fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
     assert_series_equal(df_read.col, expected)
@@ -171,18 +170,11 @@ def test_write_datetime_mixed_offset(tmp_path):
     df = gp.GeoDataFrame(
         {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]}
     )
-    fpath = tmp_path / "test.geojson"
+    fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
-    df_no_tz = read_dataframe(
-        fpath, datetime_as_string=False
-    )  # TODO this shouldn't be called datetime as string in the pandas layer,
-    #     should it even be accessible?
-    # datetime_as_string=False ignores tz info, returns datetime objects
-    expected = ser_naive.astype("datetime64[ms]")
-    assert_series_equal(expected, df_no_tz["dates"])
-    # datetime_as_string=True keeps tz info, but pandas can't handle multiple offsets
-    # unless given a timezone to identify them with -> returned as strings
     df_local = read_dataframe(fpath, datetime_as_string=True)
+    # GDAL tz only encodes offsets, not timezones so expect object dtype
+    # as pandas only supports datetimes for fixed offset / identified timezones
     assert_series_equal(ser_localised.astype("object"), df_local["dates"])
 
 

From ebdb71b39fc0db7b5f3987847526be217b03dad9 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 8 Aug 2023 22:17:42 +1000
Subject: [PATCH 26/51] fixes

---
 pyogrio/geopandas.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index fa4b9eb0..d6224534 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -343,9 +343,8 @@ def write_dataframe(
     # TODO: may need to fill in pd.NA, etc
     field_data = []
     field_mask = []
-    timezone_cols_metadata = (
-        {}
-    )  # dict[str,np.array(datetime.datetime)] special case for dt-tz fields
+    # dict[str, np.array(datetime.datetime)] special case for dt-tz fields
+    timezone_cols_metadata = {}
     for name in fields:
         ser = df[name]
         col = ser.values
@@ -460,5 +459,6 @@ def write_dataframe(
         metadata=metadata,
         dataset_options=dataset_options,
         layer_options=layer_options,
+        timezone_cols_metadata=timezone_cols_metadata,
         **kwargs,
     )

From f46e716cdee60f050de71c652e8b06a243a748c8 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 8 Aug 2023 22:22:16 +1000
Subject: [PATCH 27/51] rewrite datetime reading to handle mixed offset to utc

---
 pyogrio/geopandas.py               | 22 ++++++++++++++++------
 pyogrio/tests/test_geopandas_io.py | 10 +++++++---
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index d6224534..a4dd9550 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -23,13 +23,23 @@ def _stringify_path(path):
 def _try_parse_datetime(ser):
     import pandas as pd  # only called in a block where pandas is known to be installed
 
-    if Version(pd.__version__) >= Version("2.0.0"):
-        res = pd.to_datetime(ser, format="ISO8601")
-        if res.dtype != "object":
-            res = res.dt.as_unit("ms")
-        return res
+    pandas_ge_20 = Version(pd.__version__) >= Version("2.0.0")
+
+    if pandas_ge_20:
+        datetime_kwargs = dict(format="ISO8601", errors="ignore")
     else:
-        return pd.to_datetime(ser, yearfirst=True)
+        datetime_kwargs = dict(yearfirst=True)
+    res = pd.to_datetime(ser, **datetime_kwargs)
+    # if object dtype, try parse as utc instead
+    if res.dtype == "object":
+        res = pd.to_datetime(ser, utc=True, **datetime_kwargs)
+
+    if res.dtype != "object":
+        if pandas_ge_20:
+            res = res.dt.as_unit("ms")
+        else:
+            res = ser.dt.round(freq="ms")
+    return res
 
 
 def read_dataframe(
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 46b61abd..1f8f7e6f 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -167,15 +167,19 @@ def test_write_datetime_mixed_offset(tmp_path):
     tz = pytz.timezone("Australia/Sydney")
     ser_naive = pd.Series(pd.to_datetime(dates), name="dates")
     ser_localised = ser_naive.dt.tz_localize(tz)
+    ser_utc = ser_localised.dt.tz_convert(pytz.UTC)
+    if Version(pd.__version__) >= Version("2.0.0"):
+        ser_utc = ser_utc.dt.as_unit("ms")
+
     df = gp.GeoDataFrame(
         {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]}
     )
     fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     df_local = read_dataframe(fpath, datetime_as_string=True)
-    # GDAL tz only encodes offsets, not timezones so expect object dtype
-    # as pandas only supports datetimes for fixed offset / identified timezones
-    assert_series_equal(ser_localised.astype("object"), df_local["dates"])
+    # GDAL tz only encodes offsets, not timezones, for multiple offsets
+    # read as utc datetime as otherwise would be read as string
+    assert_series_equal(ser_utc, df_local["dates"])
 
 
 def test_read_null_values(test_fgdb_vsi):

From 44686f94dbfd752adb9997d7c0791d0405f9b22c Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 8 Aug 2023 22:50:14 +1000
Subject: [PATCH 28/51] fix nat handling for datetime as string

---
 pyogrio/_io.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index 39b5a7a1..da86806b 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -632,7 +632,7 @@ cdef process_fields(
                 else:
                     data[i] = np.nan
 
-            elif field_type in ( OFTDate, OFTDateTime):
+            elif field_type in ( OFTDate, OFTDateTime) and not datetime_as_string:
                 data[i] = np.datetime64('NaT')
 
             else:

From 6b946f5bc38d3f495c33e4cdcf14f7258d9bace1 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 8 Aug 2023 22:52:50 +1000
Subject: [PATCH 29/51] don't expose datetime_as_string in pandas layer

---
 pyogrio/geopandas.py               | 2 +-
 pyogrio/tests/test_geopandas_io.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index a4dd9550..d6d234fe 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -169,7 +169,7 @@ def read_dataframe(
     path_or_buffer = _stringify_path(path_or_buffer)
 
     read_func = read_arrow if use_arrow else read
-    if not use_arrow and "datetime_as_string" not in kwargs:
+    if not use_arrow:
         kwargs["datetime_as_string"] = True
     result = read_func(
         path_or_buffer,
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 1f8f7e6f..3a702e14 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -176,7 +176,7 @@ def test_write_datetime_mixed_offset(tmp_path):
     )
     fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
-    df_local = read_dataframe(fpath, datetime_as_string=True)
+    df_local = read_dataframe(fpath)
     # GDAL tz only encodes offsets, not timezones, for multiple offsets
     # read as utc datetime as otherwise would be read as string
     assert_series_equal(ser_utc, df_local["dates"])

From ec16ed3cad0a21e9c71a37d9b16fb90014e0a178 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 8 Aug 2023 23:31:54 +1000
Subject: [PATCH 30/51] incorrect variable in 1.5.3 compat

---
 pyogrio/geopandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index d6d234fe..2e4133c7 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -38,7 +38,7 @@ def _try_parse_datetime(ser):
         if pandas_ge_20:
             res = res.dt.as_unit("ms")
         else:
-            res = ser.dt.round(freq="ms")
+            res = res.dt.round(freq="ms")
     return res
 
 

From da0639a495175865b53e6ba524cb1094cb11cc3e Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Wed, 9 Aug 2023 20:14:07 +1000
Subject: [PATCH 31/51] CLN: tidy up pandas 2.0 compat

---
 pyogrio/geopandas.py               | 17 ++++++++++++-----
 pyogrio/tests/test_geopandas_io.py |  9 ++++-----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 2e4133c7..65ae8437 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -5,6 +5,15 @@
 from packaging.version import Version
 
 
+try:
+    import pandas
+
+    PANDAS_GE_20 = Version(pandas.__version__) >= Version("2.0.0")
+
+except ImportError:
+    PANDAS_GE_20 = None
+
+
 def _stringify_path(path):
     """
     Convert path-like to a string if possible, pass-through other objects
@@ -21,11 +30,9 @@ def _stringify_path(path):
 
 
 def _try_parse_datetime(ser):
-    import pandas as pd  # only called in a block where pandas is known to be installed
-
-    pandas_ge_20 = Version(pd.__version__) >= Version("2.0.0")
+    import pandas as pd  # only called when pandas is known to be installed
 
-    if pandas_ge_20:
+    if PANDAS_GE_20:
         datetime_kwargs = dict(format="ISO8601", errors="ignore")
     else:
         datetime_kwargs = dict(yearfirst=True)
@@ -35,7 +42,7 @@ def _try_parse_datetime(ser):
         res = pd.to_datetime(ser, utc=True, **datetime_kwargs)
 
     if res.dtype != "object":
-        if pandas_ge_20:
+        if PANDAS_GE_20:
             res = res.dt.as_unit("ms")
         else:
             res = res.dt.round(freq="ms")
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 3a702e14..b25905ea 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -1,13 +1,12 @@
 import contextlib
 from datetime import datetime
 import os
-from packaging.version import Version
 import numpy as np
 import pytest
 
 from pyogrio import list_layers, read_info, __gdal_version__, __gdal_geos_version__
 from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
-from pyogrio.geopandas import read_dataframe, write_dataframe
+from pyogrio.geopandas import read_dataframe, write_dataframe, PANDAS_GE_20
 from pyogrio.raw import (
     DRIVERS,
     DRIVERS_NO_MIXED_DIMENSIONS,
@@ -136,7 +135,7 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext):
 @pytest.mark.filterwarnings("ignore: Measured")
 def test_read_datetime(test_fgdb_vsi):
     df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
-    if Version(pd.__version__) >= Version("2.0.0"):
+    if PANDAS_GE_20:
         # starting with pandas 2.0, it preserves the passed datetime resolution
         assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
     else:
@@ -147,7 +146,7 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     df = read_dataframe(test_datetime_tz)
     raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
 
-    if Version(pd.__version__) >= Version("2.0.0"):
+    if PANDAS_GE_20:
         expected = pd.to_datetime(raw_expected, format="ISO8601").as_unit("ms")
     else:
         expected = pd.to_datetime(raw_expected)
@@ -168,7 +167,7 @@ def test_write_datetime_mixed_offset(tmp_path):
     ser_naive = pd.Series(pd.to_datetime(dates), name="dates")
     ser_localised = ser_naive.dt.tz_localize(tz)
     ser_utc = ser_localised.dt.tz_convert(pytz.UTC)
-    if Version(pd.__version__) >= Version("2.0.0"):
+    if PANDAS_GE_20:
         ser_utc = ser_utc.dt.as_unit("ms")
 
     df = gp.GeoDataFrame(

From 85a67c240a38dfb0f453cc87f852ee11f94fa8db Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sun, 24 Sep 2023 22:40:59 +1000
Subject: [PATCH 32/51] suggested alternative implementation

---
 pyogrio/_io.pyx      | 11 +----------
 pyogrio/geopandas.py | 15 ++++++++-------
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index da86806b..341767fd 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -1484,15 +1484,6 @@ cdef infer_field_types(list dtypes):
     return field_types
 
 
-FIFTEEN_MINUTE_DELTA = datetime.timedelta(minutes=15)
-
-cdef int timezone_to_gdal_offset(tz_as_datetime):
-    """Convert to GDAL timezone offset representation.
-    
-    https://gdal.org/development/rfc/rfc56_millisecond_precision.html#core-changes
-    """
-    return tz_as_datetime.utcoffset() / FIFTEEN_MINUTE_DELTA + 100
-
 # TODO: set geometry and field data as memory views?
 def ogr_write(
     str path, str layer, str driver, geometry, fields, field_data, field_mask,
@@ -1830,7 +1821,7 @@ def ogr_write(
                         if tz_array is None:
                             gdal_tz = 0
                         else:
-                            gdal_tz = timezone_to_gdal_offset(tz_array[i])
+                            gdal_tz = tz_array[i]
                         OGR_F_SetFieldDateTimeEx(
                             ogr_feature,
                             field_idx,
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 65ae8437..94e47989 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -368,13 +368,14 @@ def write_dataframe(
         if isinstance(ser.dtype, pd.DatetimeTZDtype):
             # Deal with datetimes with timezones by passing down timezone separately
             # pass down naive datetime
-            col = ser.dt.tz_localize(None).values
-            # pandas only supports a single offset per column
-            # access via array since we want a numpy array not a series
-            # (only care about the utc offset, not actually the date)
-            # but ser.array.timetz won't have valid utc offset for pytz time zones
-            # (per https://docs.python.org/3/library/datetime.html#datetime.time.utcoffset) # noqa
-            timezone_cols_metadata[name] = ser.array.to_pydatetime()
+            naive = ser.dt.tz_localize(None)
+            col = naive.values
+            # compute offset relative to UTC explicitly
+            tz_offset = naive - ser.dt.tz_convert("UTC").dt.tz_localize(None)
+            # Convert to GDAL timezone offset representation.
+            # https://gdal.org/development/rfc/rfc56_millisecond_precision.html#core-changes
+            gdal_offset_representation = tz_offset // pd.Timedelta("15m") + 100
+            timezone_cols_metadata[name] = gdal_offset_representation
         else:
             col = ser.values
         if isinstance(col, pd.api.extensions.ExtensionArray):

From d96d67e02b80eae90fa48b37ec2d00988ce20488 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sun, 24 Sep 2023 22:47:35 +1000
Subject: [PATCH 33/51] code review suggestion

---
 pyogrio/tests/test_geopandas_io.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index b25905ea..d6e8518f 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -21,7 +21,6 @@
         assert_index_equal,
         assert_series_equal,
     )
-    import pytz
 
     import geopandas as gp
     from geopandas.array import from_wkt
@@ -154,7 +153,7 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
 
     assert_series_equal(df.col, expected)
     # test write and read round trips
-    fpath = tmp_path / "test.gpkg"
+    fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
     assert_series_equal(df_read.col, expected)
@@ -163,17 +162,16 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
 def test_write_datetime_mixed_offset(tmp_path):
     # Summer Time (GMT+11), standard time (GMT+10)
     dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
-    tz = pytz.timezone("Australia/Sydney")
     ser_naive = pd.Series(pd.to_datetime(dates), name="dates")
-    ser_localised = ser_naive.dt.tz_localize(tz)
-    ser_utc = ser_localised.dt.tz_convert(pytz.UTC)
+    ser_localised = ser_naive.dt.tz_localize("Australia/Sydney")
+    ser_utc = ser_localised.dt.tz_convert("UTC")
     if PANDAS_GE_20:
         ser_utc = ser_utc.dt.as_unit("ms")
 
     df = gp.GeoDataFrame(
         {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]}
     )
-    fpath = tmp_path / "test.gpkg"
+    fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
     df_local = read_dataframe(fpath)
     # GDAL tz only encodes offsets, not timezones, for multiple offsets

From 3eb70dc19c5d8813253cea288a2c3c1596052e5f Mon Sep 17 00:00:00 2001
From: Matt Richards <45483497+m-richards@users.noreply.github.com>
Date: Sun, 24 Sep 2023 22:50:15 +1000
Subject: [PATCH 34/51] Update pyogrio/tests/test_geopandas_io.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pyogrio/tests/test_geopandas_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index d6e8518f..770e76dd 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -173,10 +173,10 @@ def test_write_datetime_mixed_offset(tmp_path):
     )
     fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
-    df_local = read_dataframe(fpath)
+    result = read_dataframe(fpath)
     # GDAL tz only encodes offsets, not timezones, for multiple offsets
     # read as utc datetime as otherwise would be read as string
-    assert_series_equal(ser_utc, df_local["dates"])
+    assert_series_equal(result["date"], ser_utc)
 
 
 def test_read_null_values(test_fgdb_vsi):

From 3df12c0f6a77f42b345a8b55827f8c8f9f7e28d4 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Thu, 28 Sep 2023 21:43:31 +1000
Subject: [PATCH 35/51] time tests and suggestions

---
 pyogrio/_io.pyx                               |  3 ++-
 .../tests/fixtures/test_datetime_tz.geojson   |  1 +
 pyogrio/tests/test_geopandas_io.py            | 27 +++++++++++++++----
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index 4ce44a46..5ffb1f49 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -761,7 +761,8 @@ cdef process_fields(
         elif field_type == OFTDateTime or field_type == OFTDate:
             
             if datetime_as_string:
-                # defer datetime parsing to user/ pandas layer 
+                # defer datetime parsing to user/ pandas layer
+                # Update to OGR_F_GetFieldAsISO8601DateTime when GDAL 3.7+ only
                 data[i] = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
             else:
                 success = OGR_F_GetFieldAsDateTimeEx(
diff --git a/pyogrio/tests/fixtures/test_datetime_tz.geojson b/pyogrio/tests/fixtures/test_datetime_tz.geojson
index 2fb72759..078a61cb 100644
--- a/pyogrio/tests/fixtures/test_datetime_tz.geojson
+++ b/pyogrio/tests/fixtures/test_datetime_tz.geojson
@@ -1,5 +1,6 @@
 {
 "type": "FeatureCollection",
+"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 "features": [
 { "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
 { "type": "Feature", "properties": { "col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 9d4c5f60..ae4f0780 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -188,10 +188,9 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     else:
         expected = pd.to_datetime(raw_expected)
     expected = pd.Series(expected, name="col")
-
     assert_series_equal(df.col, expected)
     # test write and read round trips
-    fpath = tmp_path / "test.geojson"
+    fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
     assert_series_equal(df_read.col, expected)
@@ -207,14 +206,32 @@ def test_write_datetime_mixed_offset(tmp_path):
         ser_utc = ser_utc.dt.as_unit("ms")
 
     df = gp.GeoDataFrame(
-        {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]}
+        {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]},
+        crs="EPSG:4326",
     )
-    fpath = tmp_path / "test.geojson"
+    fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     result = read_dataframe(fpath)
     # GDAL tz only encodes offsets, not timezones, for multiple offsets
     # read as utc datetime as otherwise would be read as string
-    assert_series_equal(result["date"], ser_utc)
+    assert_series_equal(result["dates"], ser_utc)
+
+
+def test_read_write_datetime_tz_with_nulls(tmp_path):
+    dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", pd.NaT]
+    if PANDAS_GE_20:
+        dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
+    else:
+        dates = pd.to_datetime(dates_raw)
+    df = gp.GeoDataFrame(
+        {"dates": dates, "geometry": [Point(1, 1), Point(1, 1), Point(1, 1)]},
+        crs="EPSG:4326",
+    )
+    fpath = tmp_path / "test.geojson"
+    write_dataframe(df, fpath)
+    breakpoint()
+    result = read_dataframe(fpath)
+    assert_geodataframe_equal(df, result)
 
 
 def test_read_null_values(test_fgdb_vsi):

From 8fd30a5ca45ef02d335f4202b2fc5cb56531b1b2 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Thu, 28 Sep 2023 21:46:05 +1000
Subject: [PATCH 36/51] remove breakpoint

---
 pyogrio/tests/test_geopandas_io.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index ae4f0780..1832483a 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -229,7 +229,6 @@ def test_read_write_datetime_tz_with_nulls(tmp_path):
     )
     fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
-    breakpoint()
     result = read_dataframe(fpath)
     assert_geodataframe_equal(df, result)
 

From 55293c0765b9259b80a12ef8bdef4527f95bf1bd Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 30 Sep 2023 12:42:53 +1000
Subject: [PATCH 37/51] catch warning

---
 pyogrio/geopandas.py               |  9 ++++++++-
 pyogrio/tests/test_geopandas_io.py | 10 +++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 6c9cd219..c024cb0e 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -11,6 +11,7 @@
 )
 from pyogrio.errors import DataSourceError
 from packaging.version import Version
+import warnings
 
 
 try:
@@ -44,7 +45,13 @@ def _try_parse_datetime(ser):
         datetime_kwargs = dict(format="ISO8601", errors="ignore")
     else:
         datetime_kwargs = dict(yearfirst=True)
-    res = pd.to_datetime(ser, **datetime_kwargs)
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            ".*parsing datetimes with mixed time zones will raise.*",
+            FutureWarning,
+        )
+        res = pd.to_datetime(ser, **datetime_kwargs)
     # if object dtype, try parse as utc instead
     if res.dtype == "object":
         res = pd.to_datetime(ser, utc=True, **datetime_kwargs)
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 1832483a..bf44ce38 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -190,7 +190,7 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     expected = pd.Series(expected, name="col")
     assert_series_equal(df.col, expected)
     # test write and read round trips
-    fpath = tmp_path / "test.gpkg"
+    fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
     assert_series_equal(df_read.col, expected)
@@ -209,7 +209,7 @@ def test_write_datetime_mixed_offset(tmp_path):
         {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]},
         crs="EPSG:4326",
     )
-    fpath = tmp_path / "test.gpkg"
+    fpath = tmp_path / "test.geojson"
     write_dataframe(df, fpath)
     result = read_dataframe(fpath)
     # GDAL tz only encodes offsets, not timezones, for multiple offsets
@@ -650,7 +650,7 @@ def test_write_read_empty_dataframe_unsupported(tmp_path, ext):
 
 def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
     input_gdf = read_dataframe(naturalearth_lowres)
-    output_path = tmp_path / "test.gpkg"
+    output_path = tmp_path / "test.geojson"
 
     write_dataframe(input_gdf, output_path, layer="first", promote_to_multi=True)
 
@@ -1201,7 +1201,7 @@ def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
 
     df = read_dataframe(naturalearth_lowres)
 
-    filename = os.path.join(str(tmpdir), "test.gpkg")
+    filename = os.path.join(str(tmpdir), "test.geojson")
     write_dataframe(df, filename, **{metadata_type: metadata})
 
     metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
@@ -1220,7 +1220,7 @@ def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
 )
 def test_invalid_metadata(tmpdir, naturalearth_lowres, metadata_type, metadata):
     with pytest.raises(ValueError, match="must be a string"):
-        filename = os.path.join(str(tmpdir), "test.gpkg")
+        filename = os.path.join(str(tmpdir), "test.geojson")
         write_dataframe(
             read_dataframe(naturalearth_lowres), filename, **{metadata_type: metadata}
         )

From 8040c21a836e08f00f8a30cfb3c26c276584ce86 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 30 Sep 2023 13:25:37 +1000
Subject: [PATCH 38/51] really need to fix my local gdal

---
 pyogrio/tests/test_geopandas_io.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index bf44ce38..dc48e535 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -190,7 +190,7 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     expected = pd.Series(expected, name="col")
     assert_series_equal(df.col, expected)
     # test write and read round trips
-    fpath = tmp_path / "test.geojson"
+    fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
     assert_series_equal(df_read.col, expected)
@@ -209,7 +209,7 @@ def test_write_datetime_mixed_offset(tmp_path):
         {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]},
         crs="EPSG:4326",
     )
-    fpath = tmp_path / "test.geojson"
+    fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     result = read_dataframe(fpath)
     # GDAL tz only encodes offsets, not timezones, for multiple offsets
@@ -227,7 +227,7 @@ def test_read_write_datetime_tz_with_nulls(tmp_path):
         {"dates": dates, "geometry": [Point(1, 1), Point(1, 1), Point(1, 1)]},
         crs="EPSG:4326",
     )
-    fpath = tmp_path / "test.geojson"
+    fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     result = read_dataframe(fpath)
     assert_geodataframe_equal(df, result)
@@ -650,7 +650,7 @@ def test_write_read_empty_dataframe_unsupported(tmp_path, ext):
 
 def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
     input_gdf = read_dataframe(naturalearth_lowres)
-    output_path = tmp_path / "test.geojson"
+    output_path = tmp_path / "test.gpkg"
 
     write_dataframe(input_gdf, output_path, layer="first", promote_to_multi=True)
 
@@ -706,7 +706,7 @@ def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres):
     df = read_dataframe(naturalearth_lowres)
 
     # geojson has no spatial index, so passing keyword should raise
-    outfilename = tmp_path / "test.geojson"
+    outfilename = tmp_path / "test.gpkg"
     with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
         write_dataframe(df, outfilename, spatial_index=True)
 
@@ -864,7 +864,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
 def test_write_dataframe_layer_geom_type_invalid(tmp_path, naturalearth_lowres):
     df = read_dataframe(naturalearth_lowres)
 
-    filename = tmp_path / "test.geojson"
+    filename = tmp_path / "test.gpkg"
     with pytest.raises(
         GeometryError, match="Geometry type is not supported: NotSupported"
     ):
@@ -1201,7 +1201,7 @@ def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
 
     df = read_dataframe(naturalearth_lowres)
 
-    filename = os.path.join(str(tmpdir), "test.geojson")
+    filename = os.path.join(str(tmpdir), "test.gpkg")
     write_dataframe(df, filename, **{metadata_type: metadata})
 
     metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
@@ -1220,7 +1220,7 @@ def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
 )
 def test_invalid_metadata(tmpdir, naturalearth_lowres, metadata_type, metadata):
     with pytest.raises(ValueError, match="must be a string"):
-        filename = os.path.join(str(tmpdir), "test.geojson")
+        filename = os.path.join(str(tmpdir), "test.gpkg")
         write_dataframe(
             read_dataframe(naturalearth_lowres), filename, **{metadata_type: metadata}
         )
@@ -1230,7 +1230,7 @@ def test_invalid_metadata(tmpdir, naturalearth_lowres, metadata_type, metadata):
 def test_metadata_unsupported(tmpdir, naturalearth_lowres, metadata_type):
     """metadata is silently ignored"""
 
-    filename = os.path.join(str(tmpdir), "test.geojson")
+    filename = os.path.join(str(tmpdir), "test.gpkg")
     write_dataframe(
         read_dataframe(naturalearth_lowres),
         filename,

From fccc8fb680ac4d7f3349bee30cf2c6a4ebe9a30c Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 30 Sep 2023 18:19:18 +1000
Subject: [PATCH 39/51] fix fix

---
 pyogrio/tests/test_geopandas_io.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index dc48e535..aeca6563 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -706,7 +706,7 @@ def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres):
     df = read_dataframe(naturalearth_lowres)
 
     # geojson has no spatial index, so passing keyword should raise
-    outfilename = tmp_path / "test.gpkg"
+    outfilename = tmp_path / "test.geosjon"
     with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
         write_dataframe(df, outfilename, spatial_index=True)
 
@@ -864,7 +864,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
 def test_write_dataframe_layer_geom_type_invalid(tmp_path, naturalearth_lowres):
     df = read_dataframe(naturalearth_lowres)
 
-    filename = tmp_path / "test.gpkg"
+    filename = tmp_path / "test.geosjon"
     with pytest.raises(
         GeometryError, match="Geometry type is not supported: NotSupported"
     ):
@@ -1230,7 +1230,7 @@ def test_invalid_metadata(tmpdir, naturalearth_lowres, metadata_type, metadata):
 def test_metadata_unsupported(tmpdir, naturalearth_lowres, metadata_type):
     """metadata is silently ignored"""
 
-    filename = os.path.join(str(tmpdir), "test.gpkg")
+    filename = os.path.join(str(tmpdir), "test.geojson")
     write_dataframe(
         read_dataframe(naturalearth_lowres),
         filename,

From 200cc1d74f573b22dadb637cdd2496ef97f1c442 Mon Sep 17 00:00:00 2001
From: Matt Richards <45483497+m-richards@users.noreply.github.com>
Date: Sat, 30 Sep 2023 19:48:41 +1000
Subject: [PATCH 40/51] Apply suggestions from code review

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pyogrio/tests/test_geopandas_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index aeca6563..96ec5354 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -706,7 +706,7 @@ def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres):
     df = read_dataframe(naturalearth_lowres)
 
     # geojson has no spatial index, so passing keyword should raise
-    outfilename = tmp_path / "test.geosjon"
+    outfilename = tmp_path / "test.geojson"
     with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
         write_dataframe(df, outfilename, spatial_index=True)
 
@@ -864,7 +864,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
 def test_write_dataframe_layer_geom_type_invalid(tmp_path, naturalearth_lowres):
     df = read_dataframe(naturalearth_lowres)
 
-    filename = tmp_path / "test.geosjon"
+    filename = tmp_path / "test.geojson"
     with pytest.raises(
         GeometryError, match="Geometry type is not supported: NotSupported"
     ):

From ebfc01ce6fcf9475c418be35283bcaedbaaa17c6 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 30 Sep 2023 20:17:41 +1000
Subject: [PATCH 41/51] add suggested exception handling

---
 pyogrio/geopandas.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index c024cb0e..20f09ff3 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -51,7 +51,12 @@ def _try_parse_datetime(ser):
             ".*parsing datetimes with mixed time zones will raise.*",
             FutureWarning,
         )
-        res = pd.to_datetime(ser, **datetime_kwargs)
+        # pre-emptive try catch for when pandas will raise
+        # (can tighten the exception type in future when it does)
+        try:
+            res = pd.to_datetime(ser, **datetime_kwargs)
+        except Exception:
+            pass
     # if object dtype, try parse as utc instead
     if res.dtype == "object":
         res = pd.to_datetime(ser, utc=True, **datetime_kwargs)

From c8c186a6e25a8856e531dd9311d3cc32fbdae430 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 7 Oct 2023 15:38:07 +1100
Subject: [PATCH 42/51] move pandas compat to _compat

---
 pyogrio/_compat.py   |  7 +++++++
 pyogrio/geopandas.py | 12 +-----------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/pyogrio/_compat.py b/pyogrio/_compat.py
index 596abeb0..9e932f3f 100644
--- a/pyogrio/_compat.py
+++ b/pyogrio/_compat.py
@@ -18,11 +18,18 @@
 except ImportError:
     geopandas = None
 
+try:
+    import pandas
+except ImportError:
+    pandas = None
+
 
 HAS_ARROW_API = __gdal_version__ >= (3, 6, 0) and pyarrow is not None
 
 HAS_GEOPANDAS = geopandas is not None
 
+PANDAS_GE_20 = pandas is not None and Version(pandas.__version__) >= Version("2.0.0")
+
 HAS_GDAL_GEOS = __gdal_geos_version__ is not None
 
 HAS_SHAPELY = shapely is not None and Version(shapely.__version__) >= Version("2.0.0")
diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 20f09ff3..2f9f24d4 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from pyogrio._compat import HAS_GEOPANDAS
+from pyogrio._compat import HAS_GEOPANDAS, PANDAS_GE_20
 from pyogrio.raw import (
     DRIVERS_NO_MIXED_SINGLE_MULTI,
     DRIVERS_NO_MIXED_DIMENSIONS,
@@ -10,19 +10,9 @@
     write,
 )
 from pyogrio.errors import DataSourceError
-from packaging.version import Version
 import warnings
 
 
-try:
-    import pandas
-
-    PANDAS_GE_20 = Version(pandas.__version__) >= Version("2.0.0")
-
-except ImportError:
-    PANDAS_GE_20 = None
-
-
 def _stringify_path(path):
     """
     Convert path-like to a string if possible, pass-through other objects

From 95030c0aa927cd1bfac29176f6a2e9423048a9ed Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 7 Oct 2023 16:17:27 +1100
Subject: [PATCH 43/51] address review comments

---
 pyogrio/geopandas.py                          | 43 +++++++++++--------
 pyogrio/raw.py                                |  6 ++-
 .../tests/fixtures/test_datetime_tz.geojson   |  4 +-
 pyogrio/tests/test_geopandas_io.py            | 24 +++++------
 pyogrio/tests/test_raw_io.py                  |  2 +-
 5 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 2f9f24d4..45ee5240 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -52,6 +52,9 @@ def _try_parse_datetime(ser):
         res = pd.to_datetime(ser, utc=True, **datetime_kwargs)
 
     if res.dtype != "object":
+        # GDAL only supports ms precision, convert outputs to match.
+        # Pandas 2.0 supports datetime[ms] directly, prior versions only support [ns],
+        # Instead, round the values to [ms] precision.
         if PANDAS_GE_20:
             res = res.dt.as_unit("ms")
         else:
@@ -202,6 +205,9 @@ def read_dataframe(
 
     read_func = read_arrow if use_arrow else read
     if not use_arrow:
+        # For arrow, datetimes are read as is.
+        # For numpy IO, datetimes are read as string values to preserve timezone info
+        # as numpy does not directly support timezones.
         kwargs["datetime_as_string"] = True
     result = read_func(
         path_or_buffer,
@@ -392,34 +398,37 @@ def write_dataframe(
     field_data = []
     field_mask = []
     # dict[str, np.array(datetime.datetime)] special case for dt-tz fields
-    timezone_cols_metadata = {}
+    gdal_tz_offsets = {}
     for name in fields:
-        ser = df[name]
-        col = ser.values
-        if isinstance(ser.dtype, pd.DatetimeTZDtype):
+        col = df[name]
+        values = col.values
+        if isinstance(col.dtype, pd.DatetimeTZDtype):
             # Deal with datetimes with timezones by passing down timezone separately
             # pass down naive datetime
-            naive = ser.dt.tz_localize(None)
-            col = naive.values
+            naive = col.dt.tz_localize(None)
+            values = naive.values
             # compute offset relative to UTC explicitly
-            tz_offset = naive - ser.dt.tz_convert("UTC").dt.tz_localize(None)
+            tz_offset = naive - col.dt.tz_convert("UTC").dt.tz_localize(None)
             # Convert to GDAL timezone offset representation.
+            # GMT is represented as 100 and offsets are represented by adding /
+            # subtracting 1 for every 15 minutes different from GMT.
             # https://gdal.org/development/rfc/rfc56_millisecond_precision.html#core-changes
+            # Convert each row offset to a signed multiple of 15m and add to GMT value
             gdal_offset_representation = tz_offset // pd.Timedelta("15m") + 100
-            timezone_cols_metadata[name] = gdal_offset_representation
+            gdal_tz_offsets[name] = gdal_offset_representation
         else:
-            col = ser.values
-        if isinstance(col, pd.api.extensions.ExtensionArray):
+            values = col.values
+        if isinstance(values, pd.api.extensions.ExtensionArray):
             from pandas.arrays import IntegerArray, FloatingArray, BooleanArray
 
-            if isinstance(col, (IntegerArray, FloatingArray, BooleanArray)):
-                field_data.append(col._data)
-                field_mask.append(col._mask)
+            if isinstance(values, (IntegerArray, FloatingArray, BooleanArray)):
+                field_data.append(values._data)
+                field_mask.append(values._mask)
             else:
-                field_data.append(np.asarray(col))
-                field_mask.append(np.asarray(col.isna()))
+                field_data.append(np.asarray(values))
+                field_mask.append(np.asarray(values.isna()))
         else:
-            field_data.append(col)
+            field_data.append(values)
             field_mask.append(None)
 
     # Determine geometry_type and/or promote_to_multi
@@ -514,6 +523,6 @@ def write_dataframe(
         metadata=metadata,
         dataset_options=dataset_options,
         layer_options=layer_options,
-        timezone_cols_metadata=timezone_cols_metadata,
+        timezone_cols_metadata=gdal_tz_offsets,
         **kwargs,
     )
diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 7b0b0cb5..bad2d28e 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -129,7 +129,8 @@ def read(
         If True, will return the FIDs of the feature that were read.
     datetime_as_string : bool, optional (default: False)
         If True, will return datetime dtypes as detected by GDAL as a string
-        array, instead of a datetime64 array (used to extract timezone info).
+        array (which can be used to extract timezone info), instead of
+        a datetime64 array.
 
     **kwargs
         Additional driver-specific dataset open options passed to OGR.  Invalid
@@ -148,6 +149,7 @@ def read(
         Meta is: {
             "crs": "<crs>",
             "fields": <ndarray of field names>,
+            "dtypes": <ndarray of numpy dtypes corresponding to fields>
             "encoding": "<encoding>",
             "geometry_type": "<geometry type>"
         }
@@ -386,6 +388,8 @@ def write(
     timezone_cols_metadata=None,
     **kwargs,
 ):
+    # if dtypes is given, remove it from kwargs (dtypes is included in meta returned by
+    # read, and it is convenient to pass meta directly into write for round trip tests)
     kwargs.pop("dtypes", None)
     path = vsi_path(str(path))
 
diff --git a/pyogrio/tests/fixtures/test_datetime_tz.geojson b/pyogrio/tests/fixtures/test_datetime_tz.geojson
index 078a61cb..e6b39206 100644
--- a/pyogrio/tests/fixtures/test_datetime_tz.geojson
+++ b/pyogrio/tests/fixtures/test_datetime_tz.geojson
@@ -2,7 +2,7 @@
 "type": "FeatureCollection",
 "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 "features": [
-{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
-{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
+{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
+{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
 ]
 }
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 96ec5354..3f6cf718 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -187,34 +187,34 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
         expected = pd.to_datetime(raw_expected, format="ISO8601").as_unit("ms")
     else:
         expected = pd.to_datetime(raw_expected)
-    expected = pd.Series(expected, name="col")
-    assert_series_equal(df.col, expected)
+    expected = pd.Series(expected, name="datetime_col")
+    assert_series_equal(df.datetime_col, expected)
     # test write and read round trips
     fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     df_read = read_dataframe(fpath)
-    assert_series_equal(df_read.col, expected)
+    assert_series_equal(df_read.datetime_col, expected)
 
 
 def test_write_datetime_mixed_offset(tmp_path):
-    # Summer Time (GMT+11), standard time (GMT+10)
+    # Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
     dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
-    ser_naive = pd.Series(pd.to_datetime(dates), name="dates")
-    ser_localised = ser_naive.dt.tz_localize("Australia/Sydney")
-    ser_utc = ser_localised.dt.tz_convert("UTC")
+    naive_col = pd.Series(pd.to_datetime(dates), name="dates")
+    localised_col = naive_col.dt.tz_localize("Australia/Sydney")
+    utc_col = localised_col.dt.tz_convert("UTC")
     if PANDAS_GE_20:
-        ser_utc = ser_utc.dt.as_unit("ms")
+        utc_col = utc_col.dt.as_unit("ms")
 
     df = gp.GeoDataFrame(
-        {"dates": ser_localised, "geometry": [Point(1, 1), Point(1, 1)]},
+        {"dates": localised_col, "geometry": [Point(1, 1), Point(1, 1)]},
         crs="EPSG:4326",
     )
     fpath = tmp_path / "test.gpkg"
     write_dataframe(df, fpath)
     result = read_dataframe(fpath)
-    # GDAL tz only encodes offsets, not timezones, for multiple offsets
-    # read as utc datetime as otherwise would be read as string
-    assert_series_equal(result["dates"], ser_utc)
+    # GDAL tz only encodes offsets, not timezones
+    # check multiple offsets are read as utc datetime instead of string values
+    assert_series_equal(result["dates"], utc_col)
 
 
 def test_read_write_datetime_tz_with_nulls(tmp_path):
diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py
index a53d9d8c..acbeebcf 100644
--- a/pyogrio/tests/test_raw_io.py
+++ b/pyogrio/tests/test_raw_io.py
@@ -775,7 +775,7 @@ def test_read_datetime_millisecond(test_datetime):
     assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
 
 
-def test_read_datetime_tz(test_datetime_tz):
+def test_read_datetime_as_string(test_datetime_tz):
     field = read(test_datetime_tz)[3][0]
     assert field.dtype == "datetime64[ms]"
     # timezone is ignored in numpy layer

From 086e52e4bad829207a55ac1e3ee4547626122e9c Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 7 Oct 2023 16:49:29 +1100
Subject: [PATCH 44/51] update known issues

---
 docs/source/known_issues.md | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/docs/source/known_issues.md b/docs/source/known_issues.md
index 3d84215e..1b0b463c 100644
--- a/docs/source/known_issues.md
+++ b/docs/source/known_issues.md
@@ -55,16 +55,23 @@ with obscure error messages.
 ## Support for reading and writing DateTimes
 
 GDAL only supports datetimes at a millisecond resolution. Reading data will thus
-give at most millisecond resolution (`datetime64[ms]` data type), even though
-the data is cast `datetime64[ns]` data type when reading into a data frame
-using `pyogrio.read_dataframe()`. When writing, only precision up to ms is retained.
+give at most millisecond resolution (`datetime64[ms]` data type). With pandas 2.0
+`pyogrio.read_dataframe()` will return datetime data as `datetime64[ms]` 
+correspondingly. For previous versions of pandas, `datetime64[ns]` is used as 
+ms precision was not supported. When writing, only precision up to 
+ms is retained.
 
 Not all file formats have dedicated support to store datetime data, like ESRI
 Shapefile. For such formats, or if you require precision > ms, a workaround is to
 convert the datetimes to string.
 
-Timezone information is ignored at the moment, both when reading and when writing
-datetime columns.
+Timezone information is preserved where possible, however GDAL only represents
+time zones as UTC offsets, whilst pandas uses IANA time zones (via `pytz` or 
+`zoneinfo`). This means that dataframes with columns containing multiple offsets 
+e.g. when switching from standard time to summer time will be written correctly,
+but when read via `pyogrio.read_dataframe()` will be returned in UTC time, as
+there is no way to reconstruct the original timezone from the individual offsets
+present.
 
 ## Support for OpenStreetMap (OSM) data
 

From 2b2dd5fba7a996762b569891b6d28e059b75300f Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Sat, 7 Oct 2023 16:50:52 +1100
Subject: [PATCH 45/51] reword

---
 docs/source/known_issues.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/known_issues.md b/docs/source/known_issues.md
index 1b0b463c..8f351f4e 100644
--- a/docs/source/known_issues.md
+++ b/docs/source/known_issues.md
@@ -68,10 +68,10 @@ convert the datetimes to string.
 Timezone information is preserved where possible, however GDAL only represents
 time zones as UTC offsets, whilst pandas uses IANA time zones (via `pytz` or 
 `zoneinfo`). This means that dataframes with columns containing multiple offsets 
-e.g. when switching from standard time to summer time will be written correctly,
-but when read via `pyogrio.read_dataframe()` will be returned in UTC time, as
-there is no way to reconstruct the original timezone from the individual offsets
-present.
+(e.g. when switching from standard time to summer time) will be written correctly,
+but when read via `pyogrio.read_dataframe()` will be returned as a UTC datetime 
+column, as there is no way to reconstruct the original timezone from the individual 
+offsets present.
 
 ## Support for OpenStreetMap (OSM) data
 

From 2167d0fc6ceadc5c4f9aaba06df19276c5445052 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 17 Oct 2023 21:45:45 +1100
Subject: [PATCH 46/51] move documentation

---
 docs/source/introduction.md | 21 +++++++++++++++++++++
 docs/source/known_issues.md | 21 ---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/docs/source/introduction.md b/docs/source/introduction.md
index 3fd0c324..e7ac8cf1 100644
--- a/docs/source/introduction.md
+++ b/docs/source/introduction.md
@@ -464,6 +464,27 @@ You can also read from a URL with this syntax:
 >>> read_dataframe("zip+https://s3.amazonaws.com/bucket/shapefile.zip")
 ```
 
+## Reading and writing DateTimes
+
+GDAL only supports datetimes at a millisecond resolution. Reading data will thus
+give at most millisecond resolution (`datetime64[ms]` data type). With pandas 2.0
+`pyogrio.read_dataframe()` will return datetime data as `datetime64[ms]` 
+correspondingly. For previous versions of pandas, `datetime64[ns]` is used as 
+ms precision was not supported. When writing, only precision up to 
+ms is retained.
+
+Not all file formats have dedicated support to store datetime data, like ESRI
+Shapefile. For such formats, or if you require precision > ms, a workaround is to
+convert the datetimes to string.
+
+Timezone information is preserved where possible, however GDAL only represents
+time zones as UTC offsets, whilst pandas uses IANA time zones (via `pytz` or 
+`zoneinfo`). This means that dataframes with columns containing multiple offsets 
+(e.g. when switching from standard time to summer time) will be written correctly,
+but when read via `pyogrio.read_dataframe()` will be returned as a UTC datetime 
+column, as there is no way to reconstruct the original timezone from the individual 
+offsets present.
+
 ## Dataset and layer creation options
 
 It is possible to use dataset and layer creation options available for a given
diff --git a/docs/source/known_issues.md b/docs/source/known_issues.md
index 8f351f4e..29d390ee 100644
--- a/docs/source/known_issues.md
+++ b/docs/source/known_issues.md
@@ -52,27 +52,6 @@ Pyogrio does not currently validate attribute values or geometry types before
 attempting to write to the output file. Invalid types may crash during writing
 with obscure error messages.
 
-## Support for reading and writing DateTimes
-
-GDAL only supports datetimes at a millisecond resolution. Reading data will thus
-give at most millisecond resolution (`datetime64[ms]` data type). With pandas 2.0
-`pyogrio.read_dataframe()` will return datetime data as `datetime64[ms]` 
-correspondingly. For previous versions of pandas, `datetime64[ns]` is used as 
-ms precision was not supported. When writing, only precision up to 
-ms is retained.
-
-Not all file formats have dedicated support to store datetime data, like ESRI
-Shapefile. For such formats, or if you require precision > ms, a workaround is to
-convert the datetimes to string.
-
-Timezone information is preserved where possible, however GDAL only represents
-time zones as UTC offsets, whilst pandas uses IANA time zones (via `pytz` or 
-`zoneinfo`). This means that dataframes with columns containing multiple offsets 
-(e.g. when switching from standard time to summer time) will be written correctly,
-but when read via `pyogrio.read_dataframe()` will be returned as a UTC datetime 
-column, as there is no way to reconstruct the original timezone from the individual 
-offsets present.
-
 ## Support for OpenStreetMap (OSM) data
 
 OpenStreetMap data do not natively support calculating the feature count by data

From ab0fbf6ec3ba4bceb141a4732910a6f6f09e0c16 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Tue, 17 Oct 2023 21:46:50 +1100
Subject: [PATCH 47/51] rename field as suggested

---
 pyogrio/_io.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx
index 747873cc..8376bda7 100644
--- a/pyogrio/_io.pyx
+++ b/pyogrio/_io.pyx
@@ -1665,7 +1665,7 @@ def ogr_write(
     str crs, str geometry_type, str encoding, object dataset_kwargs,
     object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True,
     bint append=False, dataset_metadata=None, layer_metadata=None,
-    timezone_cols_metadata=None
+    gdal_tz_offsets=None
 ):
     cdef const char *path_c = NULL
     cdef const char *layer_c = NULL
@@ -1736,8 +1736,8 @@ def ogr_write(
     if not layer:
         layer = os.path.splitext(os.path.split(path)[1])[0]
 
-    if timezone_cols_metadata is None:
-        timezone_cols_metadata = {}
+    if gdal_tz_offsets is None:
+        gdal_tz_offsets = {}
 
 
     # if shapefile, GeoJSON, or FlatGeobuf, always delete first
@@ -2012,7 +2012,7 @@ def ogr_write(
                         OGR_F_SetFieldNull(ogr_feature, field_idx)
                     else:
                         datetime = field_value.astype("datetime64[ms]").item()
-                        tz_array = timezone_cols_metadata.get(fields[field_idx], None)
+                        tz_array = gdal_tz_offsets.get(fields[field_idx], None)
                         if tz_array is None:
                             gdal_tz = 0
                         else:

From 0f02115ce02b8eb46e2b04830029e3a9dc220813 Mon Sep 17 00:00:00 2001
From: Matt Richards <mrichards7@outlook.com.au>
Date: Wed, 18 Oct 2023 08:54:14 +1100
Subject: [PATCH 48/51] final missing gdal tz offset change

---
 pyogrio/geopandas.py | 2 +-
 pyogrio/raw.py       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index 9c94d099..fcab92ac 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -561,6 +561,6 @@ def write_dataframe(
         metadata=metadata,
         dataset_options=dataset_options,
         layer_options=layer_options,
-        timezone_cols_metadata=gdal_tz_offsets,
+        gdal_tz_offsets=gdal_tz_offsets,
         **kwargs,
     )
diff --git a/pyogrio/raw.py b/pyogrio/raw.py
index 3aed010d..4384ee9a 100644
--- a/pyogrio/raw.py
+++ b/pyogrio/raw.py
@@ -456,7 +456,7 @@ def write(
     metadata=None,
     dataset_options=None,
     layer_options=None,
-    timezone_cols_metadata=None,
+    gdal_tz_offsets=None,
     **kwargs,
 ):
     # if dtypes is given, remove it from kwargs (dtypes is included in meta returned by
@@ -545,5 +545,5 @@ def write(
         layer_metadata=layer_metadata,
         dataset_kwargs=dataset_kwargs,
         layer_kwargs=layer_kwargs,
-        timezone_cols_metadata=timezone_cols_metadata,
+        gdal_tz_offsets=gdal_tz_offsets,
     )

From 52a922d5827996822d8d7583395f502317693ca4 Mon Sep 17 00:00:00 2001
From: Matt Richards <45483497+m-richards@users.noreply.github.com>
Date: Wed, 18 Oct 2023 08:55:19 +1100
Subject: [PATCH 49/51] Update pyogrio/tests/test_geopandas_io.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pyogrio/tests/test_geopandas_io.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
index 96981999..1c7ddfc5 100644
--- a/pyogrio/tests/test_geopandas_io.py
+++ b/pyogrio/tests/test_geopandas_io.py
@@ -174,8 +174,10 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
 
 
 @pytest.mark.filterwarnings("ignore: Measured")
-def test_read_datetime(test_fgdb_vsi):
-    df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
+def test_read_datetime(test_fgdb_vsi, use_arrow):
+    df = read_dataframe(
+        test_fgdb_vsi, layer="test_lines", use_arrow=use_arrow, max_features=1
+    )
     if PANDAS_GE_20:
         # starting with pandas 2.0, it preserves the passed datetime resolution
         assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"

From 7c99e51f397cd3e2e210908eb8f0eda8db1a4fd5 Mon Sep 17 00:00:00 2001
From: Matt Richards <45483497+m-richards@users.noreply.github.com>
Date: Wed, 18 Oct 2023 08:59:17 +1100
Subject: [PATCH 50/51] Apply suggestions from code review

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pyogrio/geopandas.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py
index fcab92ac..a0f30f3a 100644
--- a/pyogrio/geopandas.py
+++ b/pyogrio/geopandas.py
@@ -435,11 +435,10 @@ def write_dataframe(
     # TODO: may need to fill in pd.NA, etc
     field_data = []
     field_mask = []
-    # dict[str, np.array(datetime.datetime)] special case for dt-tz fields
+    # dict[str, np.array(int)] special case for dt-tz fields
     gdal_tz_offsets = {}
     for name in fields:
         col = df[name]
-        values = col.values
         if isinstance(col.dtype, pd.DatetimeTZDtype):
             # Deal with datetimes with timezones by passing down timezone separately
             # pass down naive datetime

From a5f5f9db089f34b6bc3549fac48b1af4184eaf8e Mon Sep 17 00:00:00 2001
From: Brendan Ward <bcward@astutespruce.com>
Date: Thu, 19 Oct 2023 19:56:12 -0700
Subject: [PATCH 51/51] add changelog entry

---
 CHANGES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGES.md b/CHANGES.md
index 664172d8..5b5c5caf 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,6 +4,7 @@
 
 ### Improvements
 
+-   Support reading and writing datetimes with timezones (#253).
 -   Support writing dataframes without geometry column (#267).
 -   Calculate feature count by iterating over features if GDAL returns an
     unknown count for a data layer (e.g., OSM driver); this may have signficant