-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: initial version of convert_to_annual for pandas, #736
- Loading branch information
Showing
6 changed files
with
154 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import nose | ||
import unittest | ||
|
||
import numpy as np | ||
|
||
from pandas import Series, date_range | ||
import pandas.util.testing as tm | ||
|
||
from pandas.tseries.tools import convert_to_annual, isleapyear | ||
|
||
class TestConvertAnnual(unittest.TestCase): | ||
""" | ||
New pandas of scikits.timeseries convert_to_annual | ||
""" | ||
def test_daily(self): | ||
rng = date_range('1/1/2000', '12/31/2004', freq='D') | ||
ts = Series(np.random.randn(len(rng)), index=rng) | ||
|
||
annual = convert_to_annual(ts, 'D') | ||
|
||
doy = ts.index.dayofyear | ||
doy[(-isleapyear(ts.index.year)) & (doy >= 60)] += 1 | ||
|
||
for i in range(1, 367): | ||
subset = ts[doy == i] | ||
subset.index = [x.year for x in subset.index] | ||
|
||
tm.assert_series_equal(annual[i].dropna(), subset) | ||
|
||
# check leap days | ||
leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] | ||
day = leaps.index.dayofyear[0] | ||
leaps.index = leaps.index.year | ||
tm.assert_series_equal(annual[day].dropna(), leaps) | ||
|
||
def test_weekly(self): | ||
pass | ||
|
||
def test_monthly(self): | ||
rng = date_range('1/1/2000', '12/31/2004', freq='M') | ||
ts = Series(np.random.randn(len(rng)), index=rng) | ||
|
||
annual = convert_to_annual(ts, 'M') | ||
|
||
month = ts.index.month | ||
|
||
for i in range(1, 13): | ||
subset = ts[month == i] | ||
subset.index = [x.year for x in subset.index] | ||
tm.assert_series_equal(annual[i].dropna(), subset) | ||
|
||
def test_interval_monthly(self): | ||
pass | ||
|
||
def test_interval_daily(self): | ||
pass | ||
|
||
def test_interval_weekly(self): | ||
pass | ||
|
||
if __name__ == '__main__': | ||
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], | ||
exit=False) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from pandas.core.frame import DataFrame | ||
import pandas.core.nanops as nanops | ||
|
||
import numpy as np | ||
|
||
def convert_to_annual(series, freq=None): | ||
""" | ||
Group a series by years, taking leap years into account. | ||
The output has as many rows as distinct years in the original series, | ||
and as many columns as the length of a leap year in the units corresponding | ||
to the original frequency (366 for daily frequency, 366*24 for hourly...). | ||
The fist column of the output corresponds to Jan. 1st, 00:00:00, | ||
while the last column corresponds to Dec, 31st, 23:59:59. | ||
Entries corresponding to Feb. 29th are masked for non-leap years. | ||
For example, if the initial series has a daily frequency, the 59th column | ||
of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st, | ||
and the 60th column is masked for non-leap years. | ||
With a hourly initial frequency, the (59*24)th column of the output always | ||
correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and | ||
the 24 columns between (59*24) and (61*24) are masked. | ||
If the original frequency is less than daily, the output is equivalent to | ||
``series.convert('A', func=None)``. | ||
Parameters | ||
---------- | ||
series : TimeSeries | ||
freq : string or None, default None | ||
Returns | ||
------- | ||
annual : DataFrame | ||
""" | ||
index = series.index | ||
year = index.year | ||
years = nanops.unique1d(year) | ||
|
||
if freq is not None: | ||
freq = freq.upper() | ||
|
||
if freq == 'D': | ||
width = 366 | ||
offset = index.dayofyear - 1 | ||
|
||
# adjust for leap year | ||
offset[(-isleapyear(year)) & (offset >= 59)] += 1 | ||
|
||
columns = range(1, 367) | ||
# todo: strings like 1/1, 1/25, etc.? | ||
elif freq in ('M', 'BM'): | ||
width = 12 | ||
offset = index.month - 1 | ||
columns = range(1, 13) | ||
else: | ||
raise NotImplementedError(freq) | ||
|
||
flat_index = (year - years.min()) * width + offset | ||
|
||
values = np.empty((len(years), width), dtype=series.dtype) | ||
|
||
if not np.issubdtype(series.dtype, np.integer): | ||
values.fill(np.nan) | ||
else: | ||
raise Exception('need to upcast') | ||
|
||
values.put(flat_index, series.values) | ||
|
||
return DataFrame(values, index=years, columns=columns) | ||
|
||
def isleapyear(year): | ||
""" | ||
Returns true if year is a leap year. | ||
Parameters | ||
---------- | ||
year : integer / sequence | ||
A given (list of) year(s). | ||
""" | ||
year = np.asarray(year) | ||
return np.logical_or(year % 400 == 0, | ||
np.logical_and(year % 4 == 0, year % 100 > 0)) | ||
|