Skip to content

Commit

Permalink
Merge pull request #21 from mortonjt/regression_results_part1
Browse files Browse the repository at this point in the history
ENH: Adding in first draft of the RegressionResults object
  • Loading branch information
antgonza authored Jul 28, 2016
2 parents 22cbcc3 + 491165b commit 356896d
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 0 deletions.
1 change: 1 addition & 0 deletions ci/pip_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
coveralls
ete3
statsmodels
59 changes: 59 additions & 0 deletions gneiss/_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python

# ----------------------------------------------------------------------------
# Copyright (c) 2016--, gneiss development team.
#
# Distributed under the terms of the GPLv3 License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import pandas as pd


class RegressionResults():
"""
Summary object for storing regression results.
"""
def __init__(self, stat_results,
feature_names=None,
basis=None):
""" Reorganizes statsmodels regression modules.
Accepts a list of statsmodels RegressionResults objects
and performs some addition summary statistics.
Parameters
----------
stat_results : list, sm.RegressionResults
List of RegressionResults objects.
feature_names : array_like, str, optional
List of original names for features.
basis : np.array, optional
Orthonormal basis in the Aitchison simplex.
If this is not specified, then `project` cannot
be enabled in `coefficients` or `predict`.
"""
self.feature_names = feature_names
self.basis = basis
self.results = stat_results

# sum of squares error. Also referred to as sum of squares residuals
sse = 0
# sum of squares regression. Also referred to as
# explained sum of squares.
ssr = 0
# See `statsmodels.regression.linear_model.RegressionResults`
# for more explanation on `ess` and `ssr`.

# obtain pvalues
self.pvalues = pd.DataFrame()
for r in self.results:
p = r.pvalues
p.name = r.model.endog_names
self.pvalues = self.pvalues.append(p)
sse += r.ssr
ssr += r.ess

# calculate the overall coefficient of determination (i.e. R2)
sst = sse + ssr
self.r2 = 1 - sse / sst
59 changes: 59 additions & 0 deletions gneiss/tests/test_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python

# ----------------------------------------------------------------------------
# Copyright (c) 2016--, gneiss development team.
#
# Distributed under the terms of the GPLv3 License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import pandas as pd
import pandas.util.testing as pdt
import statsmodels.formula.api as smf
import unittest
from gneiss._summary import RegressionResults


class TestRegressionResults(unittest.TestCase):

def setUp(self):
self.data = pd.DataFrame([[1, 3, 4, 5, 2, 3, 4],
list(range(1, 8)),
[1, 3, 2, 4, 3, 5, 4]],
columns=['s1', 's2', 's3', 's4',
's5', 's6', 's7'],
index=['Y1', 'Y2', 'X']).T
model1 = smf.ols(formula="Y1 ~ X", data=self.data)
model2 = smf.ols(formula="Y2 ~ X", data=self.data)
self.results = [model1.fit(), model2.fit()]

def test_r2(self):
fittedvalues = pd.DataFrame({'s1': [1.986842, 1.236842],
's2': [3.065789, 3.815789],
's3': [2.526316, 2.526316],
's4': [3.605263, 5.105263],
's5': [3.065789, 3.815789],
's6': [4.144737, 6.394737],
's7': [3.605263, 5.105263]},
index=['Y1', 'Y2']).T
m = self.data.mean(axis=0)
sse = ((fittedvalues - self.data.iloc[:, :2])**2).sum().sum()
# ssr = ((fittedvalues - m)**2).sum().sum()
sst = ((m - self.data.iloc[:, :2])**2).sum().sum()
exp_r2 = 1 - (sse / sst)

res = RegressionResults(self.results)
self.assertAlmostEqual(exp_r2, res.r2)

def test_regression_results_pvalues(self):
# checks to see if pvalues are calculated correctly.
res = RegressionResults(self.results)
exp = pd.DataFrame({'Intercept': [0.307081, 0.972395],
'X': [0.211391, 0.029677]},
index=['Y1', 'Y2'])
pdt.assert_frame_equal(res.pvalues, exp,
check_exact=False,
check_less_precise=True)

if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def finalize_options(self):
'scipy >= 0.15.1',
'nose >= 1.3.7',
'scikit-bio>=0.4.2',
'statsmodels',
'ete3',
],
classifiers=classifiers,
Expand Down

0 comments on commit 356896d

Please sign in to comment.