Skip to content

Commit

Permalink
Merge pull request pandas-dev#11628 from jreback/hdf
Browse files Browse the repository at this point in the history
ENH: Implement export of datetime64[ns, tz] dtypes with a fixed HDF5 pandas-dev#11411
  • Loading branch information
jreback committed Nov 17, 2015
2 parents 91407ff + addcec2 commit b54f2db
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 47 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Enhancements
pd.Index([1, np.nan, 3]).fillna(2)

- ``pivot_table`` now has a ``margins_name`` argument so you can use something other than the default of 'All' (:issue:`3335`)
- Implement export of ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`11411`)

.. _whatsnew_0171.api:

Expand Down Expand Up @@ -159,4 +160,3 @@ Bug Fixes
- Bug in ``DataFrame.join()`` with ``how='right'`` producing a ``TypeError`` (:issue:`11519`)
- Bug in ``Series.quantile`` with empty list results has ``Index`` with ``object`` dtype (:issue:`11588`)
- Bug in ``pd.merge`` results in empty ``Int64Index`` rather than ``Index(dtype=object)`` when the merge result is empty (:issue:`11588`)

94 changes: 56 additions & 38 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
# versioning attribute
_version = '0.15.2'

### encoding ###
# PY3 encoding if we don't specify
_default_encoding = 'UTF-8'

Expand All @@ -64,22 +65,8 @@ def _ensure_encoding(encoding):
encoding = _default_encoding
return encoding

def _set_tz(values, tz, preserve_UTC=False):
""" set the timezone if values are an Index """
if tz is not None and isinstance(values, Index):
tz = _ensure_decoded(tz)
if values.tz is None:
values = values.tz_localize('UTC').tz_convert(tz)
if preserve_UTC:
if tslib.get_timezone(tz) == 'UTC':
values = list(values)

return values


Term = Expr


def _ensure_term(where, scope_level):
"""
ensure that the where is a Term or a list of Term
Expand Down Expand Up @@ -1947,14 +1934,11 @@ def set_atom_datetime64tz(self, block, info, values=None):
if values is None:
values = block.values

# convert this column to datetime64[ns] utc, and save the tz
values = values.tz_convert('UTC').values.view('i8').reshape(block.shape)
# convert this column to i8 in UTC, and save the tz
values = values.asi8.reshape(block.shape)

# store a converted timezone
zone = tslib.get_timezone(block.values.tz)
if zone is None:
zone = tslib.tot_seconds(block.values.tz.utcoffset())
self.tz = zone
self.tz = _get_tz(block.values.tz)
self.update_info(info)

self.kind = 'datetime64'
Expand Down Expand Up @@ -2015,18 +1999,9 @@ def convert(self, values, nan_rep, encoding):

# reverse converts
if dtype == u('datetime64'):
# recreate the timezone
if self.tz is not None:

# data should be 2-dim here
# we stored as utc, so just set the tz

index = DatetimeIndex(
self.data.ravel(), tz='UTC').tz_convert(tslib.maybe_get_tz(self.tz))
self.data = index

else:
self.data = np.asarray(self.data, dtype='M8[ns]')
# recreate with tz if indicated
self.data = _set_tz(self.data, self.tz, coerce=True)

elif dtype == u('timedelta64'):
self.data = np.asarray(self.data, dtype='m8[ns]')
Expand Down Expand Up @@ -2347,7 +2322,10 @@ def read_array(self, key):
ret = data

if dtype == u('datetime64'):
ret = np.asarray(ret, dtype='M8[ns]')

# reconstruct a timezone if indicated
ret = _set_tz(ret, getattr(attrs, 'tz', None), coerce=True)

elif dtype == u('timedelta64'):
ret = np.asarray(ret, dtype='m8[ns]')

Expand Down Expand Up @@ -2397,10 +2375,7 @@ def write_index(self, key, index):
node._v_attrs.freq = index.freq

if hasattr(index, 'tz') and index.tz is not None:
zone = tslib.get_timezone(index.tz)
if zone is None:
zone = tslib.tot_seconds(index.tz.utcoffset())
node._v_attrs.tz = zone
node._v_attrs.tz = _get_tz(index.tz)

def write_block_index(self, key, index):
self.write_array('%s_blocs' % key, index.blocs)
Expand Down Expand Up @@ -2574,11 +2549,20 @@ def write_array(self, key, value, items=None):
if empty_array:
self.write_array_empty(key, value)
else:
if value.dtype.type == np.datetime64:
if com.is_datetime64_dtype(value.dtype):
self._handle.create_array(self.group, key, value.view('i8'))
getattr(
self.group, key)._v_attrs.value_type = 'datetime64'
elif value.dtype.type == np.timedelta64:
elif com.is_datetime64tz_dtype(value.dtype):
# store as UTC
# with a zone
self._handle.create_array(self.group, key,
value.asi8)

node = getattr(self.group, key)
node._v_attrs.tz = _get_tz(value.tz)
node._v_attrs.value_type = 'datetime64'
elif com.is_timedelta64_dtype(value.dtype):
self._handle.create_array(self.group, key, value.view('i8'))
getattr(
self.group, key)._v_attrs.value_type = 'timedelta64'
Expand Down Expand Up @@ -4248,6 +4232,40 @@ def _get_info(info, name):
idx = info[name] = dict()
return idx

### tz to/from coercion ###
def _get_tz(tz):
""" for a tz-aware type, return an encoded zone """
zone = tslib.get_timezone(tz)
if zone is None:
zone = tslib.tot_seconds(tz.utcoffset())
return zone

def _set_tz(values, tz, preserve_UTC=False, coerce=False):
"""
coerce the values to a DatetimeIndex if tz is set
preserve the input shape if possible
Parameters
----------
values : ndarray
tz : string/pickled tz object
preserve_UTC : boolean,
preserve the UTC of the result
coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray
"""
if tz is not None:
values = values.ravel()
tz = tslib.get_timezone(_ensure_decoded(tz))
values = DatetimeIndex(values)
if values.tz is None:
values = values.tz_localize('UTC').tz_convert(tz)
if preserve_UTC:
if tz == 'UTC':
values = list(values)
elif coerce:
values = np.asarray(values, dtype='M8[ns]')

return values

def _convert_index(index, encoding=None, format_type=None):
index_name = getattr(index, 'name', None)
Expand Down
27 changes: 19 additions & 8 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4909,15 +4909,26 @@ def test_tseries_select_index_column(self):
result = store.select_column('frame', 'index')
self.assertEqual(rng.tz, result.dt.tz)

def test_timezones(self):
rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)

def test_timezones_fixed(self):
with ensure_clean_store(self.path) as store:
store['frame'] = frame
recons = store['frame']
self.assertTrue(recons.index.equals(rng))
self.assertEqual(rng.tz, recons.index.tz)

# index
rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
store['df'] = df
result = store['df']
assert_frame_equal(result, df)

# as data
# GH11411
_maybe_remove(store, 'df')
df = DataFrame({'A' : rng,
'B' : rng.tz_convert('UTC').tz_localize(None),
'C' : rng.tz_convert('CET'),
'D' : range(len(rng))}, index=rng)
store['df'] = df
result = store['df']
assert_frame_equal(result, df)

def test_fixed_offset_tz(self):
rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
Expand Down

0 comments on commit b54f2db

Please sign in to comment.