-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #21 from mortonjt/regression_results_part1
ENH: Adding in first draft of the RegressionResults object
- Loading branch information
Showing
4 changed files
with
120 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
coveralls | ||
ete3 | ||
statsmodels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/usr/bin/env python | ||
|
||
# ---------------------------------------------------------------------------- | ||
# Copyright (c) 2016--, gneiss development team. | ||
# | ||
# Distributed under the terms of the GPLv3 License. | ||
# | ||
# The full license is in the file COPYING.txt, distributed with this software. | ||
# ---------------------------------------------------------------------------- | ||
import pandas as pd | ||
|
||
|
||
class RegressionResults(): | ||
""" | ||
Summary object for storing regression results. | ||
""" | ||
def __init__(self, stat_results, | ||
feature_names=None, | ||
basis=None): | ||
""" Reorganizes statsmodels regression modules. | ||
Accepts a list of statsmodels RegressionResults objects | ||
and performs some addition summary statistics. | ||
Parameters | ||
---------- | ||
stat_results : list, sm.RegressionResults | ||
List of RegressionResults objects. | ||
feature_names : array_like, str, optional | ||
List of original names for features. | ||
basis : np.array, optional | ||
Orthonormal basis in the Aitchison simplex. | ||
If this is not specified, then `project` cannot | ||
be enabled in `coefficients` or `predict`. | ||
""" | ||
self.feature_names = feature_names | ||
self.basis = basis | ||
self.results = stat_results | ||
|
||
# sum of squares error. Also referred to as sum of squares residuals | ||
sse = 0 | ||
# sum of squares regression. Also referred to as | ||
# explained sum of squares. | ||
ssr = 0 | ||
# See `statsmodels.regression.linear_model.RegressionResults` | ||
# for more explanation on `ess` and `ssr`. | ||
|
||
# obtain pvalues | ||
self.pvalues = pd.DataFrame() | ||
for r in self.results: | ||
p = r.pvalues | ||
p.name = r.model.endog_names | ||
self.pvalues = self.pvalues.append(p) | ||
sse += r.ssr | ||
ssr += r.ess | ||
|
||
# calculate the overall coefficient of determination (i.e. R2) | ||
sst = sse + ssr | ||
self.r2 = 1 - sse / sst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/usr/bin/env python | ||
|
||
# ---------------------------------------------------------------------------- | ||
# Copyright (c) 2016--, gneiss development team. | ||
# | ||
# Distributed under the terms of the GPLv3 License. | ||
# | ||
# The full license is in the file COPYING.txt, distributed with this software. | ||
# ---------------------------------------------------------------------------- | ||
import pandas as pd | ||
import pandas.util.testing as pdt | ||
import statsmodels.formula.api as smf | ||
import unittest | ||
from gneiss._summary import RegressionResults | ||
|
||
|
||
class TestRegressionResults(unittest.TestCase): | ||
|
||
def setUp(self): | ||
self.data = pd.DataFrame([[1, 3, 4, 5, 2, 3, 4], | ||
list(range(1, 8)), | ||
[1, 3, 2, 4, 3, 5, 4]], | ||
columns=['s1', 's2', 's3', 's4', | ||
's5', 's6', 's7'], | ||
index=['Y1', 'Y2', 'X']).T | ||
model1 = smf.ols(formula="Y1 ~ X", data=self.data) | ||
model2 = smf.ols(formula="Y2 ~ X", data=self.data) | ||
self.results = [model1.fit(), model2.fit()] | ||
|
||
def test_r2(self): | ||
fittedvalues = pd.DataFrame({'s1': [1.986842, 1.236842], | ||
's2': [3.065789, 3.815789], | ||
's3': [2.526316, 2.526316], | ||
's4': [3.605263, 5.105263], | ||
's5': [3.065789, 3.815789], | ||
's6': [4.144737, 6.394737], | ||
's7': [3.605263, 5.105263]}, | ||
index=['Y1', 'Y2']).T | ||
m = self.data.mean(axis=0) | ||
sse = ((fittedvalues - self.data.iloc[:, :2])**2).sum().sum() | ||
# ssr = ((fittedvalues - m)**2).sum().sum() | ||
sst = ((m - self.data.iloc[:, :2])**2).sum().sum() | ||
exp_r2 = 1 - (sse / sst) | ||
|
||
res = RegressionResults(self.results) | ||
self.assertAlmostEqual(exp_r2, res.r2) | ||
|
||
def test_regression_results_pvalues(self): | ||
# checks to see if pvalues are calculated correctly. | ||
res = RegressionResults(self.results) | ||
exp = pd.DataFrame({'Intercept': [0.307081, 0.972395], | ||
'X': [0.211391, 0.029677]}, | ||
index=['Y1', 'Y2']) | ||
pdt.assert_frame_equal(res.pvalues, exp, | ||
check_exact=False, | ||
check_less_precise=True) | ||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters