Skip to content

Commit

Permalink
ENH: initial version of convert_to_annual for pandas, #736
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Apr 6, 2012
1 parent 570a03a commit 5b4be0b
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 5 deletions.
7 changes: 3 additions & 4 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,6 @@ def get_result(self):
return DataFrame(values, index=index, columns=columns)

def get_new_values(self):
return self._reshape_values(self.values)

def _reshape_values(self, values):
values = self.values
# place the values
length, width = self.full_shape
Expand All @@ -148,7 +145,7 @@ def _reshape_values(self, values):
new_values.fill(np.nan)

# is there a simpler / faster way of doing this?
for i in xrange(self.values.shape[1]):
for i in xrange(values.shape[1]):
chunk = new_values[:, i * width : (i + 1) * width]
mask_chunk = new_mask[:, i * width : (i + 1) * width]

Expand Down Expand Up @@ -200,6 +197,8 @@ def get_new_index(self):

return new_index



def pivot(self, index=None, columns=None, values=None):
"""
See DataFrame.pivot
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,8 @@ def copy(self, order='C'):
-------
cp : Series
"""
return Series(self.values.copy(order), index=self.index, name=self.name)
return Series(self.values.copy(order), index=self.index,
name=self.name)

def to_dict(self):
"""
Expand Down
Empty file added pandas/tseries/__init__.py
Empty file.
Empty file.
64 changes: 64 additions & 0 deletions pandas/tseries/tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import nose
import unittest

import numpy as np

from pandas import Series, date_range
import pandas.util.testing as tm

from pandas.tseries.tools import convert_to_annual, isleapyear

class TestConvertAnnual(unittest.TestCase):
"""
New pandas of scikits.timeseries convert_to_annual
"""
def test_daily(self):
rng = date_range('1/1/2000', '12/31/2004', freq='D')
ts = Series(np.random.randn(len(rng)), index=rng)

annual = convert_to_annual(ts, 'D')

doy = ts.index.dayofyear
doy[(-isleapyear(ts.index.year)) & (doy >= 60)] += 1

for i in range(1, 367):
subset = ts[doy == i]
subset.index = [x.year for x in subset.index]

tm.assert_series_equal(annual[i].dropna(), subset)

# check leap days
leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)]
day = leaps.index.dayofyear[0]
leaps.index = leaps.index.year
tm.assert_series_equal(annual[day].dropna(), leaps)

def test_weekly(self):
pass

def test_monthly(self):
rng = date_range('1/1/2000', '12/31/2004', freq='M')
ts = Series(np.random.randn(len(rng)), index=rng)

annual = convert_to_annual(ts, 'M')

month = ts.index.month

for i in range(1, 13):
subset = ts[month == i]
subset.index = [x.year for x in subset.index]
tm.assert_series_equal(annual[i].dropna(), subset)

def test_interval_monthly(self):
pass

def test_interval_daily(self):
pass

def test_interval_weekly(self):
pass

if __name__ == '__main__':
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
exit=False)

85 changes: 85 additions & 0 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from pandas.core.frame import DataFrame
import pandas.core.nanops as nanops

import numpy as np

def convert_to_annual(series, freq=None):
"""
Group a series by years, taking leap years into account.
The output has as many rows as distinct years in the original series,
and as many columns as the length of a leap year in the units corresponding
to the original frequency (366 for daily frequency, 366*24 for hourly...).
The fist column of the output corresponds to Jan. 1st, 00:00:00,
while the last column corresponds to Dec, 31st, 23:59:59.
Entries corresponding to Feb. 29th are masked for non-leap years.
For example, if the initial series has a daily frequency, the 59th column
of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
and the 60th column is masked for non-leap years.
With a hourly initial frequency, the (59*24)th column of the output always
correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
the 24 columns between (59*24) and (61*24) are masked.
If the original frequency is less than daily, the output is equivalent to
``series.convert('A', func=None)``.
Parameters
----------
series : TimeSeries
freq : string or None, default None
Returns
-------
annual : DataFrame
"""
index = series.index
year = index.year
years = nanops.unique1d(year)

if freq is not None:
freq = freq.upper()

if freq == 'D':
width = 366
offset = index.dayofyear - 1

# adjust for leap year
offset[(-isleapyear(year)) & (offset >= 59)] += 1

columns = range(1, 367)
# todo: strings like 1/1, 1/25, etc.?
elif freq in ('M', 'BM'):
width = 12
offset = index.month - 1
columns = range(1, 13)
else:
raise NotImplementedError(freq)

flat_index = (year - years.min()) * width + offset

values = np.empty((len(years), width), dtype=series.dtype)

if not np.issubdtype(series.dtype, np.integer):
values.fill(np.nan)
else:
raise Exception('need to upcast')

values.put(flat_index, series.values)

return DataFrame(values, index=years, columns=columns)

def isleapyear(year):
"""
Returns true if year is a leap year.
Parameters
----------
year : integer / sequence
A given (list of) year(s).
"""
year = np.asarray(year)
return np.logical_or(year % 400 == 0,
np.logical_and(year % 4 == 0, year % 100 > 0))

0 comments on commit 5b4be0b

Please sign in to comment.