diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 5275e5c098845..2bccff1062ea4 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -130,9 +130,6 @@ def get_result(self): return DataFrame(values, index=index, columns=columns) def get_new_values(self): - return self._reshape_values(self.values) - - def _reshape_values(self, values): values = self.values # place the values length, width = self.full_shape @@ -148,7 +145,7 @@ def _reshape_values(self, values): new_values.fill(np.nan) # is there a simpler / faster way of doing this? - for i in xrange(self.values.shape[1]): + for i in xrange(values.shape[1]): chunk = new_values[:, i * width : (i + 1) * width] mask_chunk = new_mask[:, i * width : (i + 1) * width] @@ -200,6 +197,8 @@ def get_new_index(self): return new_index + + def pivot(self, index=None, columns=None, values=None): """ See DataFrame.pivot diff --git a/pandas/core/series.py b/pandas/core/series.py index c8ee264f7b2f3..6e11a6ab88d2d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -839,7 +839,8 @@ def copy(self, order='C'): ------- cp : Series """ - return Series(self.values.copy(order), index=self.index, name=self.name) + return Series(self.values.copy(order), index=self.index, + name=self.name) def to_dict(self): """ diff --git a/pandas/tseries/__init__.py b/pandas/tseries/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tseries/tests/__init__.py b/pandas/tseries/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tseries/tests/test_tools.py b/pandas/tseries/tests/test_tools.py new file mode 100644 index 0000000000000..b828dbff1eff5 --- /dev/null +++ b/pandas/tseries/tests/test_tools.py @@ -0,0 +1,64 @@ +import nose +import unittest + +import numpy as np + +from pandas import Series, date_range +import pandas.util.testing as tm + +from pandas.tseries.tools import convert_to_annual, isleapyear + +class TestConvertAnnual(unittest.TestCase): + """ + New pandas of scikits.timeseries convert_to_annual + """ + def test_daily(self): + rng = date_range('1/1/2000', '12/31/2004', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = convert_to_annual(ts, 'D') + + doy = ts.index.dayofyear + doy[(-isleapyear(ts.index.year)) & (doy >= 60)] += 1 + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = [x.year for x in subset.index] + + tm.assert_series_equal(annual[i].dropna(), subset) + + # check leap days + leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] + day = leaps.index.dayofyear[0] + leaps.index = leaps.index.year + tm.assert_series_equal(annual[day].dropna(), leaps) + + def test_weekly(self): + pass + + def test_monthly(self): + rng = date_range('1/1/2000', '12/31/2004', freq='M') + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = convert_to_annual(ts, 'M') + + month = ts.index.month + + for i in range(1, 13): + subset = ts[month == i] + subset.index = [x.year for x in subset.index] + tm.assert_series_equal(annual[i].dropna(), subset) + + def test_interval_monthly(self): + pass + + def test_interval_daily(self): + pass + + def test_interval_weekly(self): + pass + +if __name__ == '__main__': + nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + exit=False) + diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py new file mode 100644 index 0000000000000..ac665bb772f28 --- /dev/null +++ b/pandas/tseries/tools.py @@ -0,0 +1,85 @@ +from pandas.core.frame import DataFrame +import pandas.core.nanops as nanops + +import numpy as np + +def convert_to_annual(series, freq=None): + """ + Group a series by years, taking leap years into account. + + The output has as many rows as distinct years in the original series, + and as many columns as the length of a leap year in the units corresponding + to the original frequency (366 for daily frequency, 366*24 for hourly...). + The fist column of the output corresponds to Jan. 1st, 00:00:00, + while the last column corresponds to Dec, 31st, 23:59:59. + Entries corresponding to Feb. 29th are masked for non-leap years. + + For example, if the initial series has a daily frequency, the 59th column + of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st, + and the 60th column is masked for non-leap years. + With a hourly initial frequency, the (59*24)th column of the output always + correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and + the 24 columns between (59*24) and (61*24) are masked. + + If the original frequency is less than daily, the output is equivalent to + ``series.convert('A', func=None)``. + + Parameters + ---------- + series : TimeSeries + freq : string or None, default None + + + Returns + ------- + annual : DataFrame + """ + index = series.index + year = index.year + years = nanops.unique1d(year) + + if freq is not None: + freq = freq.upper() + + if freq == 'D': + width = 366 + offset = index.dayofyear - 1 + + # adjust for leap year + offset[(-isleapyear(year)) & (offset >= 59)] += 1 + + columns = range(1, 367) + # todo: strings like 1/1, 1/25, etc.? + elif freq in ('M', 'BM'): + width = 12 + offset = index.month - 1 + columns = range(1, 13) + else: + raise NotImplementedError(freq) + + flat_index = (year - years.min()) * width + offset + + values = np.empty((len(years), width), dtype=series.dtype) + + if not np.issubdtype(series.dtype, np.integer): + values.fill(np.nan) + else: + raise Exception('need to upcast') + + values.put(flat_index, series.values) + + return DataFrame(values, index=years, columns=columns) + +def isleapyear(year): + """ + Returns true if year is a leap year. + + Parameters + ---------- + year : integer / sequence + A given (list of) year(s). + """ + year = np.asarray(year) + return np.logical_or(year % 400 == 0, + np.logical_and(year % 4 == 0, year % 100 > 0)) +