From 31d63c965b3d8aeca382fce504d825a937282692 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 Jun 2022 09:44:25 +0200 Subject: [PATCH 1/4] ENH: read DateTime fields with millisecond accuracy --- pyogrio/_io.pyx | 42 ++++++++++++++++++++++++------------------ pyogrio/_ogr.pxd | 2 +- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 43037a14..527fdd90 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -7,6 +7,7 @@ import datetime import locale import logging +import math import os import warnings @@ -31,20 +32,20 @@ log = logging.getLogger(__name__) # Mapping of OGR integer field types to Python field type names # (index in array is the integer field type) FIELD_TYPES = [ - 'int32', # OFTInteger, Simple 32bit integer - None, # OFTIntegerList, List of 32bit integers, not supported - 'float64', # OFTReal, Double Precision floating point - None, # OFTRealList, List of doubles, not supported - 'object', # OFTString, String of UTF-8 chars - None, # OFTStringList, Array of strings, not supported - None, # OFTWideString, deprecated, not supported - None, # OFTWideStringList, deprecated, not supported - 'object', # OFTBinary, Raw Binary data - 'datetime64[D]',# OFTDate, Date - None, # OFTTime, Time, NOTE: not directly supported in numpy - 'datetime64[s]',# OFTDateTime, Date and Time - 'int64', # OFTInteger64, Single 64bit integer - None # OFTInteger64List, List of 64bit integers, not supported + 'int32', # OFTInteger, Simple 32bit integer + None, # OFTIntegerList, List of 32bit integers, not supported + 'float64', # OFTReal, Double Precision floating point + None, # OFTRealList, List of doubles, not supported + 'object', # OFTString, String of UTF-8 chars + None, # OFTStringList, Array of strings, not supported + None, # OFTWideString, deprecated, not supported + None, # OFTWideStringList, deprecated, not supported + 'object', # OFTBinary, Raw Binary data + 'datetime64[D]', # OFTDate, Date + None, # OFTTime, Time, NOTE: not directly supported in numpy + 'datetime64[us]',# OFTDateTime, Date and Time + 'int64', # OFTInteger64, Single 64bit integer + None # OFTInteger64List, List of 64bit integers, not supported ] FIELD_SUBTYPES = { @@ -508,7 +509,7 @@ cdef process_fields( cdef int day = 0 cdef int hour = 0 cdef int minute = 0 - cdef int second = 0 + cdef float fsecond = 0.0 cdef int timezone = 0 for j in range(n_fields): @@ -554,8 +555,13 @@ cdef process_fields( data[i] = bin_value[:ret_length] elif field_type == OFTDateTime or field_type == OFTDate: - success = OGR_F_GetFieldAsDateTime( - ogr_feature, field_index, &year, &month, &day, &hour, &minute, &second, &timezone) + success = OGR_F_GetFieldAsDateTimeEx( + ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone) + + ms, ss = math.modf(fsecond) + second = int(ss) + # fsecond has millisecond accuracy + microsecond = round(ms * 1000) * 1000 if not success: data[i] = np.datetime64('NaT') @@ -564,7 +570,7 @@ cdef process_fields( data[i] = datetime.date(year, month, day).isoformat() elif field_type == OFTDateTime: - data[i] = datetime.datetime(year, month, day, hour, minute, second).isoformat() + data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat() @cython.boundscheck(False) # Deactivate bounds checking diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 5f9a7f6b..f638c045 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -197,7 +197,7 @@ cdef extern from "ogr_api.h": int64_t OGR_F_GetFID(OGRFeatureH feature) OGRGeometryH OGR_F_GetGeometryRef(OGRFeatureH feature) GByte* OGR_F_GetFieldAsBinary(OGRFeatureH feature, int n, int *s) - int OGR_F_GetFieldAsDateTime(OGRFeatureH feature, int n, int *y, int *m, int *d, int *h, int *m, int *s, int *z) + int OGR_F_GetFieldAsDateTimeEx(OGRFeatureH feature, int n, int *y, int *m, int *d, int *h, int *m, float *s, int *z) double OGR_F_GetFieldAsDouble(OGRFeatureH feature, int n) int OGR_F_GetFieldAsInteger(OGRFeatureH feature, int n) long OGR_F_GetFieldAsInteger64(OGRFeatureH feature, int n) From b90d626070b542f1170253f8a55d8b67d4b74b4a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 3 Jun 2022 08:33:31 +0200 Subject: [PATCH 2/4] us -> ms --- pyogrio/_io.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 527fdd90..b5ac971b 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -43,7 +43,7 @@ FIELD_TYPES = [ 'object', # OFTBinary, Raw Binary data 'datetime64[D]', # OFTDate, Date None, # OFTTime, Time, NOTE: not directly supported in numpy - 'datetime64[us]',# OFTDateTime, Date and Time + 'datetime64[ms]',# OFTDateTime, Date and Time 'int64', # OFTInteger64, Single 64bit integer None # OFTInteger64List, List of 64bit integers, not supported ] From 03a4a8a6fa5f4d69150c089abaf91ba7c979f3bb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 3 Jun 2022 08:46:21 +0200 Subject: [PATCH 3/4] add small test --- pyogrio/tests/conftest.py | 5 +++++ pyogrio/tests/fixtures/test_datetime.geojson | 7 +++++++ pyogrio/tests/test_raw_io.py | 7 +++++++ 3 files changed, 19 insertions(+) create mode 100644 pyogrio/tests/fixtures/test_datetime.geojson diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 4d88f2ad..734cbb59 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -80,3 +80,8 @@ def test_gpkg_nulls(): @pytest.fixture(scope="session") def test_ogr_types_list(): return _data_dir / "test_ogr_types_list.geojson" + + +@pytest.fixture(scope="session") +def test_datetime(): + return _data_dir / "test_datetime.geojson" diff --git a/pyogrio/tests/fixtures/test_datetime.geojson b/pyogrio/tests/fixtures/test_datetime.geojson new file mode 100644 index 00000000..eb949330 --- /dev/null +++ b/pyogrio/tests/fixtures/test_datetime.geojson @@ -0,0 +1,7 @@ +{ +"type": "FeatureCollection", +"features": [ +{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } }, +{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } } +] +} diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index 0edcdf13..679efd9b 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -443,3 +443,10 @@ def test_read_unsupported_types(test_ogr_types_list): fields = read(test_ogr_types_list, columns=["int64"])[3] assert len(fields) == 1 + + +def test_read_datetime_millisecond(test_datetime): + field = read(test_datetime)[3][0] + assert field.dtype == "datetime64[ms]" + assert field[0] == np.datetime64("2020-01-01 09:00:00.123") + assert field[1] == np.datetime64("2020-01-01 10:00:00.000") From 09e8942bd35fd1661786ed0d50c5d665a30d4beb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 4 Jun 2022 12:10:14 +0200 Subject: [PATCH 4/4] add note to known_issues --- docs/source/known_issues.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/known_issues.md b/docs/source/known_issues.md index 9ca8319e..9e59936b 100644 --- a/docs/source/known_issues.md +++ b/docs/source/known_issues.md @@ -54,3 +54,12 @@ with obscure error messages. Date fields are not yet fully supported. These will be supported in a future release. + +## Support for reading and writing DateTimes + +Currently only reading datetime values is supported. + +GDAL only supports datetimes at a millisecond resolution. Reading data will thus +give at most millisecond resolution (`datetime64[ms]` data type), even though +the data is cast `datetime64[ns]` data type when reading into a data frame +using `pyogrio.read_dataframe()`.