forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* commit 'v0.4.1': (53 commits) RLS: Version 0.4.1 BUG: use int64 BUG: reverted Series constructor NumPy < 1.6 bug TST: wrap up test coverage TST: test coverage, minor refactoring TST: test coverage and minor bugfix in NDFrame.swaplevel DOC: documented reading CSV/table into MultiIndex, address GH pandas-dev#165 DOC: documented swaplevel, address GH pandas-dev#150 ENH: better JR join function ENH: add join panel function for testing and later integration BUG: do not allow appending with different item order ENH: don't raise exception when calling remove on non-existent node ENH: tinkering with other join impl ENH: speed up assert_almost_equal BUG: DateRange.copy did not produce well-formed object. fixes GH pandas-dev#168 DOC: update release notes BUG: count_level did not handle zero-length data case, caused segfault with NumPy < 1.6 for some. Fixes GH pandas-dev#169 ENH: sped up inner/outer_join_indexer cython functions ENH: don't boundscheck or wraparound ENH: bug fixes, speed enh, benchmark suite to compare with xts ...
- Loading branch information
Showing
37 changed files
with
1,720 additions
and
227 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# reasonably effecient | ||
|
||
def create_panels_append(cls, panels): | ||
""" return an append list of panels """ | ||
panels = [ a for a in panels if a is not None ] | ||
# corner cases | ||
if len(panels) == 0: | ||
return None | ||
elif len(panels) == 1: | ||
return panels[0] | ||
elif len(panels) == 2 and panels[0] == panels[1]: | ||
return panels[0] | ||
#import pdb; pdb.set_trace() | ||
# create a joint index for the axis | ||
def joint_index_for_axis(panels, axis): | ||
s = set() | ||
for p in panels: | ||
s.update(list(getattr(p,axis))) | ||
return sorted(list(s)) | ||
def reindex_on_axis(panels, axis, axis_reindex): | ||
new_axis = joint_index_for_axis(panels, axis) | ||
new_panels = [ p.reindex(**{ axis_reindex : new_axis, 'copy' : False}) for p in panels ] | ||
return new_panels, new_axis | ||
# create the joint major index, dont' reindex the sub-panels - we are appending | ||
major = joint_index_for_axis(panels, 'major_axis') | ||
# reindex on minor axis | ||
panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor') | ||
# reindex on items | ||
panels, items = reindex_on_axis(panels, 'items', 'items') | ||
# concatenate values | ||
try: | ||
values = np.concatenate([ p.values for p in panels ],axis=1) | ||
except (Exception), detail: | ||
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join([ "%s" % p for p in panels ]),str(detail))) | ||
#pm('append - create_panel') | ||
p = Panel(values, items = items, major_axis = major, minor_axis = minor ) | ||
#pm('append - done') | ||
return p | ||
|
||
|
||
|
||
# does the job but inefficient (better to handle like you read a table in pytables...e.g create a LongPanel then convert to Wide) | ||
|
||
def create_panels_join(cls, panels): | ||
""" given an array of panels's, create a single panel """ | ||
panels = [ a for a in panels if a is not None ] | ||
# corner cases | ||
if len(panels) == 0: | ||
return None | ||
elif len(panels) == 1: | ||
return panels[0] | ||
elif len(panels) == 2 and panels[0] == panels[1]: | ||
return panels[0] | ||
d = dict() | ||
minor, major, items = set(), set(), set() | ||
for panel in panels: | ||
items.update(panel.items) | ||
major.update(panel.major_axis) | ||
minor.update(panel.minor_axis) | ||
values = panel.values | ||
for item, item_index in panel.items.indexMap.items(): | ||
for minor_i, minor_index in panel.minor_axis.indexMap.items(): | ||
for major_i, major_index in panel.major_axis.indexMap.items(): | ||
try: | ||
d[(minor_i,major_i,item)] = values[item_index,major_index,minor_index] | ||
except: | ||
pass | ||
# stack the values | ||
minor = sorted(list(minor)) | ||
major = sorted(list(major)) | ||
items = sorted(list(items)) | ||
# create the 3d stack (items x columns x indicies) | ||
data = np.dstack([ np.asarray([ np.asarray([ d.get((minor_i,major_i,item),np.nan) for item in items ]) for major_i in major ]).transpose() for minor_i in minor ]) | ||
# construct the panel | ||
return Panel(data, items, major, minor) | ||
add_class_method(Panel, create_panels_join, 'join_many') | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import numpy as np | ||
|
||
from pandas import * | ||
import pandas._tseries as lib | ||
|
||
from pandas import DataFrame | ||
import timeit | ||
|
||
setup = """ | ||
from pandas import Series | ||
import pandas._tseries as lib | ||
import random | ||
import numpy as np | ||
import random | ||
n = %d | ||
k = %d | ||
arr = np.random.randn(n, k) | ||
indexer = np.arange(n, dtype=np.int32) | ||
indexer = indexer[::-1] | ||
""" | ||
|
||
sizes = [100, 1000, 10000, 100000] | ||
iters = [1000, 1000, 100, 1] | ||
|
||
fancy_2d = [] | ||
take_2d = [] | ||
cython_2d = [] | ||
|
||
n = 1000 | ||
|
||
def _timeit(stmt, size, k=5, iters=1000): | ||
timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k)) | ||
return timer.timeit(n) / n | ||
|
||
for sz, its in zip(sizes, iters): | ||
print sz | ||
fancy_2d.append(_timeit('arr[indexer]', sz, iters=its)) | ||
take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its)) | ||
cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its)) | ||
|
||
df = DataFrame({'fancy' : fancy_2d, | ||
'take' : take_2d, | ||
'cython' : cython_2d}) | ||
|
||
print df | ||
|
||
from pandas.rpy.common import r | ||
r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)') | ||
r('set.seed(12345') | ||
r('indexer <- sample(1:10000)') | ||
r('mat[indexer,]') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
year,indiv,zit,xit | ||
1977,"A",1.2,.6 | ||
1977,"B",1.5,.5 | ||
1977,"C",1.7,.8 | ||
1978,"A",.2,.06 | ||
1978,"B",.7,.2 | ||
1978,"C",.8,.3 | ||
1978,"D",.9,.5 | ||
1978,"E",1.4,.9 | ||
1979,"C",.2,.15 | ||
1979,"D",.14,.05 | ||
1979,"E",.5,.15 | ||
1979,"F",1.2,.5 | ||
1979,"G",3.4,1.9 | ||
1979,"H",5.4,2.7 | ||
1979,"I",6.4,1.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.