Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: read DateTime fields with millisecond accuracy #111

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/source/known_issues.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,12 @@ with obscure error messages.

Date fields are not yet fully supported. These will be supported in a future
release.

## Support for reading and writing DateTimes

Currently only reading datetime values is supported.

GDAL only supports datetimes at a millisecond resolution. Reading data will thus
give at most millisecond resolution (`datetime64[ms]` data type), even though
the data is cast `datetime64[ns]` data type when reading into a data frame
using `pyogrio.read_dataframe()`.
42 changes: 24 additions & 18 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import datetime
import locale
import logging
import math
import os
import warnings

Expand All @@ -31,20 +32,20 @@ log = logging.getLogger(__name__)
# Mapping of OGR integer field types to Python field type names
# (index in array is the integer field type)
FIELD_TYPES = [
'int32', # OFTInteger, Simple 32bit integer
None, # OFTIntegerList, List of 32bit integers, not supported
'float64', # OFTReal, Double Precision floating point
None, # OFTRealList, List of doubles, not supported
'object', # OFTString, String of UTF-8 chars
None, # OFTStringList, Array of strings, not supported
None, # OFTWideString, deprecated, not supported
None, # OFTWideStringList, deprecated, not supported
'object', # OFTBinary, Raw Binary data
'datetime64[D]',# OFTDate, Date
None, # OFTTime, Time, NOTE: not directly supported in numpy
'datetime64[s]',# OFTDateTime, Date and Time
'int64', # OFTInteger64, Single 64bit integer
None # OFTInteger64List, List of 64bit integers, not supported
'int32', # OFTInteger, Simple 32bit integer
None, # OFTIntegerList, List of 32bit integers, not supported
'float64', # OFTReal, Double Precision floating point
None, # OFTRealList, List of doubles, not supported
'object', # OFTString, String of UTF-8 chars
None, # OFTStringList, Array of strings, not supported
None, # OFTWideString, deprecated, not supported
None, # OFTWideStringList, deprecated, not supported
'object', # OFTBinary, Raw Binary data
'datetime64[D]', # OFTDate, Date
None, # OFTTime, Time, NOTE: not directly supported in numpy
'datetime64[ms]',# OFTDateTime, Date and Time
'int64', # OFTInteger64, Single 64bit integer
None # OFTInteger64List, List of 64bit integers, not supported
]

FIELD_SUBTYPES = {
Expand Down Expand Up @@ -516,7 +517,7 @@ cdef process_fields(
cdef int day = 0
cdef int hour = 0
cdef int minute = 0
cdef int second = 0
cdef float fsecond = 0.0
cdef int timezone = 0

for j in range(n_fields):
Expand Down Expand Up @@ -562,8 +563,13 @@ cdef process_fields(
data[i] = bin_value[:ret_length]

elif field_type == OFTDateTime or field_type == OFTDate:
success = OGR_F_GetFieldAsDateTime(
ogr_feature, field_index, &year, &month, &day, &hour, &minute, &second, &timezone)
success = OGR_F_GetFieldAsDateTimeEx(
ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)

ms, ss = math.modf(fsecond)
second = int(ss)
# fsecond has millisecond accuracy
microsecond = round(ms * 1000) * 1000

if not success:
data[i] = np.datetime64('NaT')
Expand All @@ -572,7 +578,7 @@ cdef process_fields(
data[i] = datetime.date(year, month, day).isoformat()

elif field_type == OFTDateTime:
data[i] = datetime.datetime(year, month, day, hour, minute, second).isoformat()
data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()


@cython.boundscheck(False) # Deactivate bounds checking
Expand Down
2 changes: 1 addition & 1 deletion pyogrio/_ogr.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ cdef extern from "ogr_api.h":
int64_t OGR_F_GetFID(OGRFeatureH feature)
OGRGeometryH OGR_F_GetGeometryRef(OGRFeatureH feature)
GByte* OGR_F_GetFieldAsBinary(OGRFeatureH feature, int n, int *s)
int OGR_F_GetFieldAsDateTime(OGRFeatureH feature, int n, int *y, int *m, int *d, int *h, int *m, int *s, int *z)
int OGR_F_GetFieldAsDateTimeEx(OGRFeatureH feature, int n, int *y, int *m, int *d, int *h, int *m, float *s, int *z)
double OGR_F_GetFieldAsDouble(OGRFeatureH feature, int n)
int OGR_F_GetFieldAsInteger(OGRFeatureH feature, int n)
long OGR_F_GetFieldAsInteger64(OGRFeatureH feature, int n)
Expand Down
5 changes: 5 additions & 0 deletions pyogrio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,8 @@ def test_gpkg_nulls():
@pytest.fixture(scope="session")
def test_ogr_types_list():
return _data_dir / "test_ogr_types_list.geojson"


@pytest.fixture(scope="session")
def test_datetime():
return _data_dir / "test_datetime.geojson"
7 changes: 7 additions & 0 deletions pyogrio/tests/fixtures/test_datetime.geojson
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"type": "FeatureCollection",
"features": [
{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
]
}
7 changes: 7 additions & 0 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,10 @@ def test_read_unsupported_types(test_ogr_types_list):

fields = read(test_ogr_types_list, columns=["int64"])[3]
assert len(fields) == 1


def test_read_datetime_millisecond(test_datetime):
field = read(test_datetime)[3][0]
assert field.dtype == "datetime64[ms]"
assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
assert field[1] == np.datetime64("2020-01-01 10:00:00.000")