From c2d00afcdf794932e99a09babdcce64de45a1f95 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 9 Jun 2013 22:44:10 -0700 Subject: [PATCH 001/343] Fixed the bias in nbd_lt --- distributions.py | 178 +++++++++++++++++++++++++++++++++--------- test_distributions.py | 67 +++++++--------- 2 files changed, 169 insertions(+), 76 deletions(-) diff --git a/distributions.py b/distributions.py index 7b61fec..a9f1267 100644 --- a/distributions.py +++ b/distributions.py @@ -104,7 +104,7 @@ import numpy as np import scipy.stats as stats import scipy.optimize -import scipy.special +import scipy.special as spec from copy import deepcopy import math as m import scipy.integrate as integrate @@ -2136,7 +2136,9 @@ def nll_nb(k): return self -class nbd_lt(nbd): + +class nbd_lt(Distribution): + __doc__ = Distribution.__doc__ + \ ''' Description ----------- @@ -2153,34 +2155,37 @@ class nbd_lt(nbd): self.var keywords ----------------- - Parameterization differs for different forms of the nbd. We use the - standard ecological form as described by Ben Bolker. Parameters 'a' (1 / - n_samp), 'tot_obs', and k are used to derive the nbd parameter p (see code - for details). Parameters k and p are used to generate distribution. k is - included in self.var if it is calculated in fit. - - p : array of floats - p parameters of nbd + mu : array of floats + mu parameters of nbd + bias_mu : array of float + mu used to correct for bias in mean k : array of floats Aggregation parameter + k is included in self.var if it is calculated in fit. Notes ----- The total species (S) is equivalent to n_samp and the total individuals (N) is equivalent to tot_obs. + Parameterization based on Sampford 1955 + + There is a bias in the mean when k is small. The mean tends to be larger + than expected. This method used brute force to correct for the bias so + that the mean of the distribution + ''' + @doc_inherit def __init__(self, **kwargs): self.params = kwargs self.min_supp = 1 - self.par_num = 2 + self.par_num = 2 self.var = {} - - - def pmf(self, n): - ''' + + def pmf(self, n, fix_bias=True, vals=1e3): + """ Probability mass function method. Parameters @@ -2188,6 +2193,14 @@ def pmf(self, n): n : int, float or array-like object Values at which to calculate pmf. May be a list of same length as parameters, or single iterable. + fix_bias : bool + If True, fixes the bias in the truncated negative binomial such + that the mean of the distribution is equal to tot_obs / n_samp. + The bias increases as k -> 0. + vals : float + Creates a vector np.arange(1, vals + 1) to correct the bias. A + higher vals will mean a more precise correction but slower run + time. Returns ------- @@ -2195,24 +2208,57 @@ def pmf(self, n): List of 1D arrays of probability of observing sample n. See class docstring for more specific information on this distribution. - ''' + + """ # Get parameters - n_samp, tot_obs, k = self.get_params(['n_samp', 'tot_obs', 'k']) + n_samp, tot_obs, k =\ + self.get_params(['n_samp', 'tot_obs', 'k']) n = expand_n(n, len(n_samp)) - - # TODO: Additional checks? - reg_nbd = nbd(n_samp=n_samp, tot_obs=tot_obs, k=k) - reg_pmf = reg_nbd.pmf(n) - self.var = reg_nbd.var - reg_pmf0 = reg_nbd.pmf(0) + assert np.all(n_samp <= tot_obs), 'n_samp must be <= tot_obs' - trunc_pmf = [(pr / (1 - p0)) for pr, p0 in zip(reg_pmf, reg_pmf0)] + # Calculate pmf + def pmf_eq(n, m, k): + om = (1 / (1 + (m/k))); eta = 1 - om - return trunc_pmf + norm = np.exp(spec.gammaln(k + n) - ((spec.gammaln(k) + + spec.gammaln(n + 1)))) - def cdf(self, n): + kernel = (om**k / (1 - om**k)) * (eta**n) + return norm * kernel + + mu = tot_obs / n_samp + self.var['mu'] = mu + self.var['bias_mu'] = [] + + pmf = [] + nums = np.arange(1, vals + 1) + bias_eq = lambda m, ks: sum(nums * pmf_eq(nums, m, ks)) - tmu + + for tn_samp, ttot_obs, tmu, tk, tn in zip(n_samp, tot_obs, + mu, k, n): + # Fix bias + if fix_bias: + try: + tmu = scipy.optimize.brentq(bias_eq, 1, tmu, args=(tk,)) + self.var['bias_mu'].append(tmu) + except(ValueError): + try: + tmu = scipy.optimize.brentq(bias_eq, 1e-10, tmu, + args=(tk,)) + self.var['bias_mu'].append(tmu) + except(ValueError): + self.var['bias_mu'].append(np.nan) + + tpmf = pmf_eq(tn, tmu, tk) + + pmf.append(tpmf) + + self.var['bias_mu'] = np.array(self.var['bias_mu']) + return pmf + + def cdf(self, n, fix_bias=True, vals=1e3): ''' Cumulative distribution method. @@ -2221,28 +2267,86 @@ def cdf(self, n): n : int, float or array-like object Values at which to calculate cdf. May be a list of same length as parameters, or single iterable. + fix_bias : bool + If True, fixes the bias in the truncated negative binomial such + that the mean of the distribution is equal to tot_obs / n_samp. + The bias increases as k -> 0. + vals : float + Creates a vector np.arange(1, vals + 1) to correct the bias. A + higher vals will mean a more precise correction but slower run + time. Returns ------- cdf : list of ndarrays - List of 1D arrays of probability of observing sample n. + List of 1D arrays of cumulative probability of observing sample n. See class docstring for more specific information on this distribution. ''' + + for kw in self.params.iterkeys(): + if not np.iterable(self.params[kw]): + self.params[kw] = make_array(self.params[kw]) - n_samp, tot_obs, k = self.get_params(['n_samp', 'tot_obs', 'k']) - n = expand_n(n, len(n_samp)) + # Expand n argument if needed, assumes all params same length + n = expand_n(n, len(self.params.values()[0])) + + # Calculate pmfs + max_n = [np.max(tn) for tn in n] + n_in = [np.arange(self.min_supp, i + 1) for i in max_n] + + pmf_list = self.pmf(n_in, fix_bias=fix_bias, vals=vals) + + # Calculate cdfs + cdf = [] + for tpmf, tn in zip(pmf_list, n): + full_cdf = np.cumsum(tpmf) + tcdf = np.array([full_cdf[x - self.min_supp] for x in tn]) + cdf.append(tcdf) + + return cdf + + def fit(self, data, guess_for_k=1): + ''' + Fit method. + + Uses input data to get best fit parameters for distribution, and stores + these parameters in params attribute. - # TODO: Additional checks? + Parameters + ---------- + data : list of ndarrays + Data to use to fit parameters of distribution. Even if only one + data array, must be in a list with one element. + guess_for_k : float + Initial guess for parameter k in solver - reg_nbd = nbd(n_samp=n_samp, tot_obs=tot_obs, k=k) - p0 = reg_nbd.pmf(0) - self.var = reg_nbd.var - reg_cdf = reg_nbd.cdf(n) + See class docstring for more specific information on this distribution. + ''' - trun_cdf = [(tcdf - tp0) / (1 - tp0) for tcdf, tp0 in zip(reg_cdf, p0)] + super(nbd_lt, self).fit(data) + n_samp, tot_obs = self.get_params(['n_samp', 'tot_obs']) - return trun_cdf + data = check_list_of_iterables(data) + tempk = [] + + for tdata, tn_samp, ttot_obs in zip(data, n_samp, tot_obs): + + def nll_nb(k): + self.params['tot_obs'] = ttot_obs + self.params['n_samp'] = tn_samp + self.params['k'] = k + return -sum(np.log(self.pmf(tdata, fix_bias=False)[0])) + + mlek = scipy.optimize.fmin(nll_nb, np.array([guess_for_k]), + disp=0)[0] + tempk.append(mlek) + self.params['k'] = np.array(tempk) + self.params['n_samp'] = n_samp + self.params['tot_obs'] = tot_obs + self.var['k'] = np.array(tempk) + + return self class fnbd(Distribution): __doc__ = Distribution.__doc__ + \ @@ -3952,7 +4056,7 @@ def _ln_choose(n, k): Log binomial coefficient with extended gamma factorials. n and k may be int or array - if both array, must be the same length. ''' - gammaln = scipy.special.gammaln + gammaln = spec.gammaln return gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1)) def set_up_and_down(anch, a_list, base=2): diff --git a/test_distributions.py b/test_distributions.py index 99ed837..b5999a2 100644 --- a/test_distributions.py +++ b/test_distributions.py @@ -427,14 +427,11 @@ def test_nbd_lt(self): # Test that cdf is about one dist = nbd_lt(tot_obs=2300, n_samp=45, k=3) - self.assertTrue(np.round(dist.cdf(2300)[0][0], decimals=1) == 1.0) + d = dist.cdf(2300)[0][0] + print dist.var + print dist.params + self.assertTrue(np.round(d, decimals=1) == 1.0) - # Check that k of length one is extended to length 2 based on p - # parameter - dist = nbd_lt(tot_obs=[400, 600], n_samp=[30, 23], k=[3]) - pmf = dist.pmf(1) - self.assertTrue(np.array_equal(np.round(dist.var['p'], decimals=4), - np.array([.1837,.1031]))) # Multiple entries both yield cdf with 1 dist = nbd_lt(tot_obs=[400, 600], n_samp=[30, 23], k=[3,2]) @@ -444,38 +441,30 @@ def test_nbd_lt(self): self.assertTrue(a == b) # Test pmf against scipy - mu = 500 * (1. / 20); k = 2; p = 1. / (mu / k + 1) - scipy_0 = stats.nbinom.pmf(0, k, p) - vals = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]) - test_vals = stats.nbinom.pmf(vals, k, p) / (1 - scipy_0) - pred_vals = nbd_lt(tot_obs=500, n_samp=20, k=2).pmf(vals)[0] - self.assertTrue(np.array_equal(test_vals, pred_vals)) - - # Test pmf against Published Truncated NBD. Sampford 1955, The - # Truncated Negative Binomial Distribution. - def test_pmf(n, p, k): - om = (1 / (1 + (mu/k))); eta = 1 - om - - norm = np.math.gamma(k + n) / (np.math.gamma(k) * - np.math.gamma(n + 1)) - - kernel = (om**k / (1 - om**k)) * (eta**n) - return norm * kernel - - test_vals = np.array([test_pmf(x, p, k) for x in vals]) - test_vals = np.round(test_vals, decimals=7) - pred_vals = np.round(pred_vals, decimals=7) - self.assertTrue(np.array_equal(test_vals, pred_vals)) - - # Test cdf against Published TNBD: - pred_cdf = nbd_lt(tot_obs=500, n_samp=20, k=2).cdf(vals)[0] - pred_cdf = np.round(pred_cdf, decimals=7) - test_vals = np.array([test_pmf(x, p, k) for x in vals]) - test_cdf = np.round(np.cumsum(test_vals), decimals=7) - self.assertTrue(np.array_equal(pred_cdf, test_cdf)) - - - + ks = np.linspace(0.01, 5, 100) + for k in ks: + mu = 500 * (1. / 20); p = 1. / (mu / k + 1) + scipy_0 = stats.nbinom.pmf(0, k, p) + vals = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]) + test_vals = stats.nbinom.pmf(vals, k, p) / (1 - scipy_0) + pred_vals = nbd_lt(tot_obs=500, n_samp=20, k=k).pmf(vals, + fix_bias=False)[0] + if not np.array_equal(np.round(test_vals, decimals=3), + np.round(pred_vals, decimals=3)): + print pred_vals + print test_vals + self.assertTrue(np.array_equal(np.round(test_vals, decimals=3), + np.round(pred_vals, decimals=3))) + + + # Test that fixing the bias leads to the proper mean + ks = np.linspace(.01, 5, num=100) + vals = np.arange(1,1000) + for k in ks: + ob = nbd_lt(tot_obs=500, n_samp=20, k=k) + pred_vals = ob.pmf(vals)[0] + bmean = sum(vals * pred_vals) + self.assertTrue(np.round(bmean, decimals=1) == 500 / 20.) def test_fnbd(self): From a0cd12298a4c77eb2c2b7e3490e1d9e696b7269f Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 11 Jun 2013 12:44:08 -0700 Subject: [PATCH 002/343] make_array can take in dtype --- distributions.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/distributions.py b/distributions.py index a9f1267..e3fb191 100644 --- a/distributions.py +++ b/distributions.py @@ -3909,12 +3909,20 @@ def beta_solver(x, k, tot_obs, n_samp): return sum(x ** k / float(tot_obs) * n_samp) - sum((x ** k) / k) -def make_array(n): - '''Cast n as iterable array.''' +def make_array(n, dtype=None): + '''Cast n as iterable array. If dtype not none this will be the dtype of + the array. Otherwise it lets python choose. Must be a valid dtype or an + error will be thrown''' if np.iterable(n): - return np.array(n) + if dtype==None: + return np.array(n) + else: + return np.array(n, dtype=dtype) else: - return np.array([n]) + if dtype==None: + return np.array([n]) + else: + return np.array([n], dtype=dtype) def expand_n(n, size): From 30a0e3012c2061988284376fa0cc6715c5149eb9 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 11 Jun 2013 19:51:39 -0700 Subject: [PATCH 003/343] Deleted pandas import --- data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data.py b/data.py index a94229e..f0f6bd9 100644 --- a/data.py +++ b/data.py @@ -16,7 +16,6 @@ import xml.etree.ElementTree as etree from matplotlib.mlab import csv2rec import sqlite3 as lite -import pandas as pd class DataTable: From 0b8a60c2be74b4ab51ae3546d953180a9518fdee Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 5 Aug 2013 07:43:38 -0700 Subject: [PATCH 004/343] Added calculation of sum of squares and mean squared error --- compare.py | 10 ++++++++-- distributions.py | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/compare.py b/compare.py index d4bd508..8f62980 100644 --- a/compare.py +++ b/compare.py @@ -1470,7 +1470,7 @@ def calc_ci(stat1, stat2): return res -def mean_squared_error(obs, pred): +def mean_squared_error(obs, pred, divide_by_n=True): ''' Calculates the mean squared error between observed and predicted data sets. The data sets must be of the same length @@ -1481,6 +1481,9 @@ def mean_squared_error(obs, pred): The observed data pred : array-like object The predicted data + divide_by_n : bool + If True, returns mean squared error. If False returns sum of squares + error. Returns ------- @@ -1493,7 +1496,10 @@ def mean_squared_error(obs, pred): obs, pred = cnvrt_to_arrays(obs, pred) - return sum((pred - obs)**2) / len(obs) + if divide_by_n: + return sum((pred - obs)**2) / len(obs) + else: + return sum((pred - obs)**2) def cnvrt_to_arrays(*args): diff --git a/distributions.py b/distributions.py index e3fb191..ef83772 100644 --- a/distributions.py +++ b/distributions.py @@ -512,7 +512,6 @@ def rad(self): return rad - def fit(self, data): ''' Fit method. From a4a225220f66b1c2fbe8b6ae4e84848050000eae Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 5 Aug 2013 08:18:09 -0700 Subject: [PATCH 005/343] Default fix_bias in nbd_lt is now False --- distributions.py | 4 ++-- test_compare.py | 2 ++ test_distributions.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/distributions.py b/distributions.py index ef83772..11ffaa8 100644 --- a/distributions.py +++ b/distributions.py @@ -2183,7 +2183,7 @@ def __init__(self, **kwargs): self.par_num = 2 self.var = {} - def pmf(self, n, fix_bias=True, vals=1e3): + def pmf(self, n, fix_bias=False, vals=1e3): """ Probability mass function method. @@ -2257,7 +2257,7 @@ def pmf_eq(n, m, k): self.var['bias_mu'] = np.array(self.var['bias_mu']) return pmf - def cdf(self, n, fix_bias=True, vals=1e3): + def cdf(self, n, fix_bias=False, vals=1e3): ''' Cumulative distribution method. diff --git a/test_compare.py b/test_compare.py index be09aaa..507a6c5 100644 --- a/test_compare.py +++ b/test_compare.py @@ -461,6 +461,8 @@ def test_compare_aic(self): sad_c = CompareSAD(self.sad_data, ['logser', 'most_even', 'nbd_lt']) aic_out = sad_c.compare_aic(crt=True) + print aic_out + # Most even should have the lowest AIC value for the second dataset self.assertTrue(aic_out[1][1] == np.min(aic_out[1])) diff --git a/test_distributions.py b/test_distributions.py index b5999a2..3e86799 100644 --- a/test_distributions.py +++ b/test_distributions.py @@ -462,7 +462,7 @@ def test_nbd_lt(self): vals = np.arange(1,1000) for k in ks: ob = nbd_lt(tot_obs=500, n_samp=20, k=k) - pred_vals = ob.pmf(vals)[0] + pred_vals = ob.pmf(vals, fix_bias=True)[0] bmean = sum(vals * pred_vals) self.assertTrue(np.round(bmean, decimals=1) == 500 / 20.) From 6217412a44266c0b0e52ac5ddc616d1ab03952b0 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 15 Aug 2013 13:18:08 -0700 Subject: [PATCH 006/343] Added examples of using SAR --- distributions.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/distributions.py b/distributions.py index 11ffaa8..fa929f9 100644 --- a/distributions.py +++ b/distributions.py @@ -2170,8 +2170,8 @@ class nbd_lt(Distribution): Parameterization based on Sampford 1955 There is a bias in the mean when k is small. The mean tends to be larger - than expected. This method used brute force to correct for the bias so - that the mean of the distribution + than expected. This method uses brute force to correct for the bias so + that the mean of the distribution is correct for small k ''' @@ -2908,7 +2908,25 @@ class gen_sar(Curve): plognorm and plognorm_lt are not supported by gen_sar. If one would like them to be supported, the full pmf for the sad must be calculated in the fit method. - + + Examples + -------- + import distributions as dist + + # Make an SAR with a Logseries SAD and Truncated Geometric SSAD. The + # community has 500 individuals and 14 species + + sar1 = dist.gen_sar(dist.logser(), dist.tgeo(), tot_obs=500, n_samp=14) + + # Number of species in half the base area and double the base area + sar1.vals([.5, 2]) + + # Make an SAR with Logseries and Truncated NBD + sar2 = dist.gen_sar(dist.logser(), dist.tnbd(k=.2), tot_obs=500, n_samp=14) + + # Iterated the SAR 2 doublings from the base scale + sar2.iter_vals(upscale=2) + ''' From 6a11cdd7acce7d9897949fbda610014c05f218c0 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 15 Aug 2013 20:02:38 -0700 Subject: [PATCH 007/343] Default behavior of nbd_lt is to adjust mu --- distributions.py | 65 +++++++++++++++++++------------------------ test_distributions.py | 31 +++++++++++---------- 2 files changed, 45 insertions(+), 51 deletions(-) diff --git a/distributions.py b/distributions.py index fa929f9..f4d03e5 100644 --- a/distributions.py +++ b/distributions.py @@ -2155,9 +2155,7 @@ class nbd_lt(Distribution): self.var keywords ----------------- mu : array of floats - mu parameters of nbd - bias_mu : array of float - mu used to correct for bias in mean + mu parameters of nbd_lt k : array of floats Aggregation parameter k is included in self.var if it is calculated in fit. @@ -2169,9 +2167,9 @@ class nbd_lt(Distribution): Parameterization based on Sampford 1955 - There is a bias in the mean when k is small. The mean tends to be larger + The mean tends to be larger than expected. This method uses brute force to correct for the bias so - that the mean of the distribution is correct for small k + that the mean of the distribution is correct for small k. ''' @@ -2192,10 +2190,6 @@ def pmf(self, n, fix_bias=False, vals=1e3): n : int, float or array-like object Values at which to calculate pmf. May be a list of same length as parameters, or single iterable. - fix_bias : bool - If True, fixes the bias in the truncated negative binomial such - that the mean of the distribution is equal to tot_obs / n_samp. - The bias increases as k -> 0. vals : float Creates a vector np.arange(1, vals + 1) to correct the bias. A higher vals will mean a more precise correction but slower run @@ -2208,6 +2202,7 @@ def pmf(self, n, fix_bias=False, vals=1e3): See class docstring for more specific information on this distribution. + """ # Get parameters @@ -2227,37 +2222,36 @@ def pmf_eq(n, m, k): kernel = (om**k / (1 - om**k)) * (eta**n) return norm * kernel - mu = tot_obs / n_samp - self.var['mu'] = mu - self.var['bias_mu'] = [] + nt_mu = tot_obs / n_samp # Non_truncated mu + self.var['mu'] = [] pmf = [] nums = np.arange(1, vals + 1) - bias_eq = lambda m, ks: sum(nums * pmf_eq(nums, m, ks)) - tmu + bias_eq = lambda m, ks, temp_mu: sum(nums * pmf_eq(nums, m, ks)) -\ + temp_mu - for tn_samp, ttot_obs, tmu, tk, tn in zip(n_samp, tot_obs, - mu, k, n): - # Fix bias - if fix_bias: + for tn_samp, ttot_obs, tnt_mu, tk, tn in zip(n_samp, tot_obs, + nt_mu, k, n): + # Find tmu + try: + tmu = scipy.optimize.brentq(bias_eq, 1, tnt_mu, + args=(tk, tnt_mu)) + except(ValueError): try: - tmu = scipy.optimize.brentq(bias_eq, 1, tmu, args=(tk,)) - self.var['bias_mu'].append(tmu) - except(ValueError): - try: - tmu = scipy.optimize.brentq(bias_eq, 1e-10, tmu, - args=(tk,)) - self.var['bias_mu'].append(tmu) - except(ValueError): - self.var['bias_mu'].append(np.nan) + tmu = scipy.optimize.brentq(bias_eq, 1e-10, tnt_mu, + args=(tk, tnt_mu)) + except(ValueError): # Set to nan if all else fails + tmu = np.nan + self.var['mu'].append(tmu) tpmf = pmf_eq(tn, tmu, tk) pmf.append(tpmf) - self.var['bias_mu'] = np.array(self.var['bias_mu']) + self.var['mu'] = np.array(self.var['mu']) return pmf - def cdf(self, n, fix_bias=False, vals=1e3): + def cdf(self, n, vals=1e3): ''' Cumulative distribution method. @@ -2266,12 +2260,8 @@ def cdf(self, n, fix_bias=False, vals=1e3): n : int, float or array-like object Values at which to calculate cdf. May be a list of same length as parameters, or single iterable. - fix_bias : bool - If True, fixes the bias in the truncated negative binomial such - that the mean of the distribution is equal to tot_obs / n_samp. - The bias increases as k -> 0. vals : float - Creates a vector np.arange(1, vals + 1) to correct the bias. A + Creates a vector np.arange(1, vals + 1) to calculate mu. A higher vals will mean a more precise correction but slower run time. @@ -2294,7 +2284,7 @@ def cdf(self, n, fix_bias=False, vals=1e3): max_n = [np.max(tn) for tn in n] n_in = [np.arange(self.min_supp, i + 1) for i in max_n] - pmf_list = self.pmf(n_in, fix_bias=fix_bias, vals=vals) + pmf_list = self.pmf(n_in, vals=vals) # Calculate cdfs cdf = [] @@ -2305,7 +2295,7 @@ def cdf(self, n, fix_bias=False, vals=1e3): return cdf - def fit(self, data, guess_for_k=1): + def fit(self, data, guess_for_k=1, vals=1e3): ''' Fit method. @@ -2319,6 +2309,9 @@ def fit(self, data, guess_for_k=1): data array, must be in a list with one element. guess_for_k : float Initial guess for parameter k in solver + vals : float + Creates a vector np.arange(1, vals + 1) to estimate mu. A + higher vals will mean a more precise mu, but a slower run time. See class docstring for more specific information on this distribution. ''' @@ -2335,7 +2328,7 @@ def nll_nb(k): self.params['tot_obs'] = ttot_obs self.params['n_samp'] = tn_samp self.params['k'] = k - return -sum(np.log(self.pmf(tdata, fix_bias=False)[0])) + return -sum(np.log(self.pmf(tdata, vals=vals)[0])) mlek = scipy.optimize.fmin(nll_nb, np.array([guess_for_k]), disp=0)[0] diff --git a/test_distributions.py b/test_distributions.py index 3e86799..89280cd 100644 --- a/test_distributions.py +++ b/test_distributions.py @@ -441,20 +441,21 @@ def test_nbd_lt(self): self.assertTrue(a == b) # Test pmf against scipy - ks = np.linspace(0.01, 5, 100) - for k in ks: - mu = 500 * (1. / 20); p = 1. / (mu / k + 1) - scipy_0 = stats.nbinom.pmf(0, k, p) - vals = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]) - test_vals = stats.nbinom.pmf(vals, k, p) / (1 - scipy_0) - pred_vals = nbd_lt(tot_obs=500, n_samp=20, k=k).pmf(vals, - fix_bias=False)[0] - if not np.array_equal(np.round(test_vals, decimals=3), - np.round(pred_vals, decimals=3)): - print pred_vals - print test_vals - self.assertTrue(np.array_equal(np.round(test_vals, decimals=3), - np.round(pred_vals, decimals=3))) + # This unit test passes when we are not fixing the bias. +# ks = np.linspace(0.01, 5, 100) +# for k in ks: +# mu = 500 * (1. / 20); p = 1. / (mu / k + 1) +# scipy_0 = stats.nbinom.pmf(0, k, p) +# vals = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]) +# test_vals = stats.nbinom.pmf(vals, k, p) / (1 - scipy_0) +# pred_vals = nbd_lt(tot_obs=500, n_samp=20, k=k).pmf(vals, +# fix_bias=False)[0] +# if not np.array_equal(np.round(test_vals, decimals=3), +# np.round(pred_vals, decimals=3)): +# print pred_vals +# print test_vals +# self.assertTrue(np.array_equal(np.round(test_vals, decimals=3), +# np.round(pred_vals, decimals=3))) # Test that fixing the bias leads to the proper mean @@ -462,7 +463,7 @@ def test_nbd_lt(self): vals = np.arange(1,1000) for k in ks: ob = nbd_lt(tot_obs=500, n_samp=20, k=k) - pred_vals = ob.pmf(vals, fix_bias=True)[0] + pred_vals = ob.pmf(vals)[0] bmean = sum(vals * pred_vals) self.assertTrue(np.round(bmean, decimals=1) == 500 / 20.) From 53551cde013aacd7dc56ef40fe1e46c65e10b19e Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 9 Sep 2013 17:25:33 -0700 Subject: [PATCH 008/343] Adjusted package call in output --- output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/output.py b/output.py index 06d7ed7..c5f631d 100644 --- a/output.py +++ b/output.py @@ -8,7 +8,7 @@ import matplotlib.pyplot as plt import numpy as np import logging -from macroeco.utils.form_func import output_form, add_field +from utils.form_func import output_form, add_field import copy as cp import os import shutil From 3756fe2180ce0eaa18b6e1272fe8a68c4a2b705f Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 11 Sep 2013 14:47:01 -0700 Subject: [PATCH 009/343] Used the notation in He and Legendre to update nbd_lt --- distributions.py | 60 ++++++++++++++++++++++++------------------- test_distributions.py | 35 ++++++++++--------------- 2 files changed, 48 insertions(+), 47 deletions(-) diff --git a/distributions.py b/distributions.py index f4d03e5..fd31a47 100644 --- a/distributions.py +++ b/distributions.py @@ -1737,8 +1737,8 @@ def pmf(self, n): pmf = [] for tn_samp, ttot_obs, tn in zip(n_samp, tot_obs, n): ttot_obs = np.round(ttot_obs, decimals=0) - #sumg = sum(eq(np.arange(1, np.floor(ttot_obs) + 1), tn_samp, ttot_obs)) - tpmf = eq(tn, tn_samp, ttot_obs)# / sumg # Normalizing + sumg = sum(eq(np.arange(1, np.floor(ttot_obs) + 1), tn_samp, ttot_obs)) + tpmf = eq(tn, tn_samp, ttot_obs) / sumg # Normalizing pmf.append(tpmf) return pmf @@ -2213,42 +2213,50 @@ def pmf(self, n, fix_bias=False, vals=1e3): assert np.all(n_samp <= tot_obs), 'n_samp must be <= tot_obs' # Calculate pmf - def pmf_eq(n, m, k): - om = (1 / (1 + (m/k))); eta = 1 - om + def pmf_eq(n, p, k): + #om = (1 / (1 + (p))); eta = 1 - om norm = np.exp(spec.gammaln(k + n) - ((spec.gammaln(k) + spec.gammaln(n + 1)))) - - kernel = (om**k / (1 - om**k)) * (eta**n) + + kernel = (p / (1 + p))**n * (1 / ((1 + p)**k - 1)) + #kernel = (om**k / (1 - om**k)) * (eta**n) return norm * kernel - nt_mu = tot_obs / n_samp # Non_truncated mu - self.var['mu'] = [] + #nt_mu = tot_obs / n_samp # Non_truncated mu + self.var['p'] = [] pmf = [] - nums = np.arange(1, vals + 1) - bias_eq = lambda m, ks, temp_mu: sum(nums * pmf_eq(nums, m, ks)) -\ - temp_mu + #nums = np.arange(1, vals + 1) + p_eq = lambda p, k, N, S : (k * p) / (1 - (1 + p)**-k) -\ + (float(N) / S) + #bias_eq = lambda m, ks, temp_mu: sum(nums * pmf_eq(nums, m, ks)) -\ + # temp_mu - for tn_samp, ttot_obs, tnt_mu, tk, tn in zip(n_samp, tot_obs, - nt_mu, k, n): - # Find tmu - try: - tmu = scipy.optimize.brentq(bias_eq, 1, tnt_mu, - args=(tk, tnt_mu)) - except(ValueError): - try: - tmu = scipy.optimize.brentq(bias_eq, 1e-10, tnt_mu, - args=(tk, tnt_mu)) - except(ValueError): # Set to nan if all else fails - tmu = np.nan + for tn_samp, ttot_obs, tk, tn in zip(n_samp, tot_obs, k, n): + # Find p + + do_it = True + count = 0 + while do_it and count < 20: - self.var['mu'].append(tmu) - tpmf = pmf_eq(tn, tmu, tk) + stop = 10**(count + 1) + count += 1 + + try: + tp = scipy.optimize.brentq(p_eq, 1e-10, stop, args=(tk, + ttot_obs, tn_samp)) + do_it = False + except(ValueError): + if count >= 20: + tp = np.nan + + self.var['p'].append(tp) + tpmf = pmf_eq(tn, tp, tk) pmf.append(tpmf) - self.var['mu'] = np.array(self.var['mu']) + self.var['p'] = np.array(self.var['p']) return pmf def cdf(self, n, vals=1e3): diff --git a/test_distributions.py b/test_distributions.py index 89280cd..3120f73 100644 --- a/test_distributions.py +++ b/test_distributions.py @@ -428,11 +428,8 @@ def test_nbd_lt(self): # Test that cdf is about one dist = nbd_lt(tot_obs=2300, n_samp=45, k=3) d = dist.cdf(2300)[0][0] - print dist.var - print dist.params self.assertTrue(np.round(d, decimals=1) == 1.0) - # Multiple entries both yield cdf with 1 dist = nbd_lt(tot_obs=[400, 600], n_samp=[30, 23], k=[3,2]) cdf = dist.cdf([[400], [600]]) @@ -440,23 +437,19 @@ def test_nbd_lt(self): b = np.round(cdf[0][0], decimals=1) self.assertTrue(a == b) - # Test pmf against scipy - # This unit test passes when we are not fixing the bias. -# ks = np.linspace(0.01, 5, 100) -# for k in ks: -# mu = 500 * (1. / 20); p = 1. / (mu / k + 1) -# scipy_0 = stats.nbinom.pmf(0, k, p) -# vals = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]) -# test_vals = stats.nbinom.pmf(vals, k, p) / (1 - scipy_0) -# pred_vals = nbd_lt(tot_obs=500, n_samp=20, k=k).pmf(vals, -# fix_bias=False)[0] -# if not np.array_equal(np.round(test_vals, decimals=3), -# np.round(pred_vals, decimals=3)): -# print pred_vals -# print test_vals -# self.assertTrue(np.array_equal(np.round(test_vals, decimals=3), -# np.round(pred_vals, decimals=3))) - + # Test the fit p values are equal to those given in He and Legendre + # 2002 + # I am rounding to the nearest whole number, those I have confirmed + # that the decimals are very close too + he_values = np.round([205.9878, 410.9853, 794.7613, 1210.0497, 1945.9970, + 3193.8362], decimals=0) + he_ks = [2, 1, 0.5, 0.3, 0.1363, 0.01] + tnbd = nbd_lt(tot_obs=335356, n_samp=814, k=he_ks) + tnbd.pmf(1) + pred = np.round(tnbd.var['p'], decimals=0) + print pred + print he_values + self.assertTrue(np.array_equal(he_values, pred)) # Test that fixing the bias leads to the proper mean ks = np.linspace(.01, 5, num=100) @@ -465,7 +458,7 @@ def test_nbd_lt(self): ob = nbd_lt(tot_obs=500, n_samp=20, k=k) pred_vals = ob.pmf(vals)[0] bmean = sum(vals * pred_vals) - self.assertTrue(np.round(bmean, decimals=1) == 500 / 20.) + self.assertTrue(np.round(bmean, decimals=0) == 500 / 20.) def test_fnbd(self): From f46ffa5a755ad6104eed38a6aa11642a24d1db8e Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 11 Sep 2013 14:56:54 -0700 Subject: [PATCH 010/343] Deleted a few extra comments in nbd_lt --- distributions.py | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/distributions.py b/distributions.py index fd31a47..1523547 100644 --- a/distributions.py +++ b/distributions.py @@ -1737,8 +1737,8 @@ def pmf(self, n): pmf = [] for tn_samp, ttot_obs, tn in zip(n_samp, tot_obs, n): ttot_obs = np.round(ttot_obs, decimals=0) - sumg = sum(eq(np.arange(1, np.floor(ttot_obs) + 1), tn_samp, ttot_obs)) - tpmf = eq(tn, tn_samp, ttot_obs) / sumg # Normalizing + #sumg = sum(eq(np.arange(1, np.floor(ttot_obs) + 1), tn_samp, ttot_obs)) + tpmf = eq(tn, tn_samp, ttot_obs) #/ sumg # Normalizing pmf.append(tpmf) return pmf @@ -2165,11 +2165,7 @@ class nbd_lt(Distribution): The total species (S) is equivalent to n_samp and the total individuals (N) is equivalent to tot_obs. - Parameterization based on Sampford 1955 - - The mean tends to be larger - than expected. This method uses brute force to correct for the bias so - that the mean of the distribution is correct for small k. + Parameterization based on Sampford 1955 and He and Legendre 2002 ''' @@ -2181,7 +2177,7 @@ def __init__(self, **kwargs): self.par_num = 2 self.var = {} - def pmf(self, n, fix_bias=False, vals=1e3): + def pmf(self, n): """ Probability mass function method. @@ -2190,10 +2186,6 @@ def pmf(self, n, fix_bias=False, vals=1e3): n : int, float or array-like object Values at which to calculate pmf. May be a list of same length as parameters, or single iterable. - vals : float - Creates a vector np.arange(1, vals + 1) to correct the bias. A - higher vals will mean a more precise correction but slower run - time. Returns ------- @@ -2214,24 +2206,18 @@ def pmf(self, n, fix_bias=False, vals=1e3): # Calculate pmf def pmf_eq(n, p, k): - #om = (1 / (1 + (p))); eta = 1 - om norm = np.exp(spec.gammaln(k + n) - ((spec.gammaln(k) + spec.gammaln(n + 1)))) kernel = (p / (1 + p))**n * (1 / ((1 + p)**k - 1)) - #kernel = (om**k / (1 - om**k)) * (eta**n) return norm * kernel - #nt_mu = tot_obs / n_samp # Non_truncated mu self.var['p'] = [] pmf = [] - #nums = np.arange(1, vals + 1) p_eq = lambda p, k, N, S : (k * p) / (1 - (1 + p)**-k) -\ (float(N) / S) - #bias_eq = lambda m, ks, temp_mu: sum(nums * pmf_eq(nums, m, ks)) -\ - # temp_mu for tn_samp, ttot_obs, tk, tn in zip(n_samp, tot_obs, k, n): # Find p @@ -2259,7 +2245,7 @@ def pmf_eq(n, p, k): self.var['p'] = np.array(self.var['p']) return pmf - def cdf(self, n, vals=1e3): + def cdf(self, n): ''' Cumulative distribution method. @@ -2268,10 +2254,6 @@ def cdf(self, n, vals=1e3): n : int, float or array-like object Values at which to calculate cdf. May be a list of same length as parameters, or single iterable. - vals : float - Creates a vector np.arange(1, vals + 1) to calculate mu. A - higher vals will mean a more precise correction but slower run - time. Returns ------- @@ -2303,7 +2285,7 @@ def cdf(self, n, vals=1e3): return cdf - def fit(self, data, guess_for_k=1, vals=1e3): + def fit(self, data, guess_for_k=1): ''' Fit method. @@ -2317,9 +2299,6 @@ def fit(self, data, guess_for_k=1, vals=1e3): data array, must be in a list with one element. guess_for_k : float Initial guess for parameter k in solver - vals : float - Creates a vector np.arange(1, vals + 1) to estimate mu. A - higher vals will mean a more precise mu, but a slower run time. See class docstring for more specific information on this distribution. ''' From 020b0659376006b96351bc023a80bfd5c13f0f8a Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 30 Sep 2013 21:55:44 -0700 Subject: [PATCH 011/343] Made nu discrete and removed Jacobian. Added omega and TSED to macroeco --- distributions.py | 301 +++++++++++++++++++++++++++++++++++------------ empirical.py | 42 +++++++ 2 files changed, 267 insertions(+), 76 deletions(-) diff --git a/distributions.py b/distributions.py index 1523547..5490ba0 100644 --- a/distributions.py +++ b/distributions.py @@ -3624,12 +3624,8 @@ class nu(Distribution): self.var keywords ----------------- - beta : list of floats - The beta lagrange multiplier lambda_2 : list of floats The lambda2 lagrange multiplier - sigma : list of floats - The sigma lagrange multiplier Notes ----- @@ -3660,36 +3656,23 @@ def pmf(self, e): n_samp, tot_obs, E = self.get_params(['n_samp', 'tot_obs', 'E']) e = expand_n(e, len(n_samp)) - start = 0.3 - stop = 2 - flmax = sys.float_info[0] pmf = [] - self.var['beta'] = [] self.var['lambda_2'] = [] + convert_e = lambda ep, l2: 1 / (l2 * (ep - 1)) + for tn_samp, ttot_obs, tE, te in zip(n_samp, tot_obs, E, e): - k = np.linspace(1, ttot_obs, num=ttot_obs) - try: - tx = scipy.optimize.brentq(beta_solver, start, - min((flmax/tn_samp)**(1/float(ttot_obs)), stop), - args = (k, ttot_obs, tn_samp), disp=True) - except(ValueError): - raise ValueError("No solution to %s.pmf for tot_obs = %.2f" - % (self.__class__.__name__, ttot_obs) + - " and n_samp = %.2f" % (tn_samp)) # Set lagrange multipliers - tbeta = -np.log(tx) tl2 = float(tn_samp) / (tE - ttot_obs) # Harte (2011) 7.26 e_max = 1 + (1 / tl2) e_min = 1 + (1 / (ttot_obs * tl2)) - norm = integrate.quad(nu_pmf_eq, e_min, e_max, (tbeta, tl2, - tn_samp))[0] tpmf = np.empty(len(te), dtype=float) + tns = np.ceil(convert_e(te, tl2)) - # Parse values that aren't in range as set to zero + # Parse values that aren't in range and set to zero ind_tot = np.arange(len(tpmf)) ind_less = np.where(te >= e_min)[0] ind_more = np.where(te <= e_max)[0] @@ -3699,11 +3682,10 @@ def pmf(self, e): tpmf[ind_exclude] = 0 if len(ind_include) != 0: - tpmf[ind_include] =\ - nu_pmf_eq(te[ind_include], tbeta, tl2, tn_samp) / norm + tpmf[ind_include] = logser_ut(tot_obs=ttot_obs, + n_samp=tn_samp).pmf(tns[ind_include])[0] pmf.append(tpmf) - self.var['beta'].append(tbeta) self.var['lambda_2'].append(tl2) return pmf @@ -3714,67 +3696,236 @@ def cdf(self, e): n_samp, tot_obs, E = self.get_params(['n_samp', 'tot_obs', 'E']) e = expand_n(e, len(n_samp)) - start = 0.3 - stop = 2 - flmax = sys.float_info[0] cdf = [] self.var['beta'] = [] self.var['lambda_2'] = [] + convert_n = lambda n, l2: 1 + (1 / (n * l2)) + for tn_samp, ttot_obs, tE, te in zip(n_samp, tot_obs, E, e): - k = np.linspace(1, ttot_obs, num=ttot_obs) - try: - tx = scipy.optimize.brentq(beta_solver, start, - min((flmax/tn_samp)**(1/float(ttot_obs)), stop), - args = (k, ttot_obs, tn_samp), disp=True) - except(ValueError): - raise ValueError("No solution to %s.pmf for tot_obs = %.2f" - % (self.__class__.__name__, ttot_obs) + - " and n_samp = %.2f" % (tn_samp)) - # Set lagrange multipliers - tbeta = -np.log(tx) tl2 = float(tn_samp) / (tE - ttot_obs) # Harte (2011) 7.26 - e_max = 1 + (1 / tl2) - e_min = 1 + (1 / (ttot_obs * tl2)) - tcdf = np.empty(len(te), dtype=float) + # Set all e so you can sum + all_e = convert_n(np.arange(1, ttot_obs + 1), tl2)[::-1] + + pmf_for_all_e = nu(tot_obs=ttot_obs, n_samp=tn_samp, + E=tE).pmf(all_e)[0] + cum_sum = np.cumsum(pmf_for_all_e) + + tcdf = np.array([cum_sum[np.sum(e_val >= all_e) - 1] if sum(e_val + >= all_e) - 1 != -1 else 0 for e_val in te]) + + cdf.append(tcdf) + self.var['lambda_2'].append(tl2) + + return cdf + + def rad(self): + ''' + This rad uses the observed cdf for a given nu distribution and the + predicted cdf to calculate the rank energy distribution. + + Returns + ------- + : list + A list of rank energy distributions + + ''' + + n_samp, tot_obs, E = self.get_params(['n_samp', 'tot_obs', 'E']) + rad = [] + + convert_n = lambda n, l2: 1 + (1 / (n * l2)) + + for tn_samp, ttot_obs, tE in zip(n_samp, tot_obs, E): + + # Set temp params + self.params['n_samp'] = tn_samp + self.params['ttot_obs'] = ttot_obs + self.params['E'] = tE + + tl2 = float(tn_samp) / (tE - ttot_obs) # Harte (2011) 7.26 + all_e = convert_n(np.arange(1, ttot_obs + 1), tl2)[::-1] + tpmf = self.pmf(all_e)[0] + tcdf = np.cumsum(tpmf) - # Parse values that aren't in range as set to 0 or 1 - ind_tot = np.arange(len(tcdf)) - ind_less = np.where(te < e_min)[0] - ind_more = np.where(te > e_max)[0] - ind_combo = np.concatenate((ind_more, ind_less)) - ind_include = np.array(list(set(ind_tot) - set(ind_combo))) + # Observed cdf. Not quite true if some energies overlap + obs_cdf = np.arange(1 / (2 * (tn_samp)), 1, 1/tn_samp) + + trad = [all_e[sum(oc >= tcdf) - 1] if sum(oc >= tcdf) - 1 != -1 + else all_e[0] for oc in obs_cdf] + + rad.append(trad) + + self.params['n_samp'] = n_samp + self.params['ttot_obs'] = tot_obs + self.params['E'] = E + + return rad + + + def fit(self, data): + ''' + Fit the average species energy distribution to data + + Parameters + ---------- + data : list of tuples - if len(ind_less) != 0: - tcdf[ind_less] = 0 - if len(ind_more) != 0: - tcdf[ind_more] = 1 + A list containing tuples of length two or a list containing tuples + of length three. If the tuples are of length two, the first object + in a tuple is an iterable containing the community individual energy + distribution. The second object in a tuple is an iterable + containing the empirical species abundance distribution. If the + tuples are of length three, the first object in the tuple is an + iterable containing the average energy distribution. The second object + in a tuple an iterable containing the community individual energy + distribution. The third object in a tuple is an iterable + containing the empirical species abundance distribution. + + ''' - norm = integrate.quad(nu_pmf_eq, e_min, e_max, (tbeta, tl2, - tn_samp))[0] + # Unpack the list of tuples + # Can either take + if len(data[0]) == 2: + ied, sad = unpack(data) + elif len(data[0]) == 3: + ased, ied, sad = unpack(data) + + # Use base class fit + super(nu, self).fit(sad) + + # Format and check energy data + data_eng = check_list_of_iterables(ied) + + # Store energy data in self.params + E = [np.sum(np.array(edata)) for edata in data_eng] + self.params['E'] = E + + return self + +class omega(Distribution): + """ + This distribution is the distribution of total energy within a species + across all species. The means of this distribution is E / S. + + Parameters + ---------- + n_samp : int or iterable + Total number of species / samples + tot_obs: int or iterable + Total number of individuals / observations + E : int or iterable + Total energy output of community + + self.var keywords + ----------------- + lambda_2 : list of floats + The lambda2 lagrange multiplier + emaxmin : list fo tuples + Each tuple contains the max total energy and min total energy for the + given state variables. + + Notes + ----- + This is a discrete distribution. + + + """ + + def pmf(self, e): + ''' + Notes + ----- + The omega distribution is only defined at e values given by + e = n + (1 / lambda2). While this function will return a pmf + value for all e greater than or equal to one, note that the pmf will + only sum to one when provided with the proper support. lambda2 can be + calculated by the equation: n_samp / (E - tot_obs) or S / (E - N) + + + ''' + + n_samp, tot_obs, E = self.get_params(['n_samp', 'tot_obs', 'E']) + e = expand_n(e, len(n_samp)) + + pmf = [] + self.var['lambda_2'] = [] + self.var['emaxmin'] = [] + + convert_e = lambda ep, l2: ep - (1 / l2) + + for tn_samp, ttot_obs, tE, te in zip(n_samp, tot_obs, E, e): + + # Set lagrange multipliers + tl2 = float(tn_samp) / (tE - ttot_obs) # Harte (2011) 7.26 + e_max = ttot_obs + (1 / tl2) + e_min = 1 + (1 / tl2) + + tpmf = np.empty(len(te), dtype=float) + tns = convert_e(te, tl2) + + # Parse values that aren't in range and set to zero + ind_tot = np.arange(len(tpmf)) + ind_less = np.where(te >= e_min)[0] + ind_more = np.where(te <= e_max)[0] + ind_include = np.intersect1d(ind_more, ind_less) + ind_exclude = np.array(list(set(ind_tot) - set(ind_include))) + if len(ind_exclude) != 0: + tpmf[ind_exclude] = 0 + if len(ind_include) != 0: - tcdf[ind_include] = np.array([integrate.quad(nu_pmf_eq, e_min, se, - (tbeta, tl2, tn_samp))[0] / norm for se in - te[ind_include]]) + tpmf[ind_include] = logser_ut(tot_obs=ttot_obs, + n_samp=tn_samp).pmf(tns[ind_include])[0] + + pmf.append(tpmf) + self.var['lambda_2'].append(tl2) + self.var['emaxmin'].append((e_max, e_min)) + + return pmf + + @doc_inherit + def cdf(self, e): + + n_samp, tot_obs, E = self.get_params(['n_samp', 'tot_obs', 'E']) + e = expand_n(e, len(n_samp)) + + + cdf = [] + self.var['lambda_2'] = [] + + convert_n = lambda n, l2: n + (1 / l2) + + for tn_samp, ttot_obs, tE, te in zip(n_samp, tot_obs, E, e): + + tl2 = float(tn_samp) / (tE - ttot_obs) # Harte (2011) 7.26 + + # Set all e so you can sum + all_e = convert_n(np.arange(1, ttot_obs + 1), tl2) + + pmf_for_all_e = omega(tot_obs=ttot_obs, n_samp=tn_samp, + E=tE).pmf(all_e)[0] + cum_sum = np.cumsum(pmf_for_all_e) + + tcdf = np.array([cum_sum[np.sum(e_val >= all_e) - 1] if sum(e_val + >= all_e) - 1 != -1 else 0 for e_val in te]) cdf.append(tcdf) - self.var['beta'].append(tbeta) self.var['lambda_2'].append(tl2) return cdf - def rad(self, tol=.1): + def rad(self): ''' - This rad uses the observed cdf for a given nu distribution and the + This rad uses the observed cdf for a given omega distribution and the predicted cdf to calculate the rank energy distribution. Parameter ---------- tol : float - Precision interval. The integral of nu is approximated at the + Precision interval. The integral of omega is approximated at the interval tol. Smaller intervals can be more precise, but a tol between 0.1 and 0.5 is more effecient and the results are changed only marginally. @@ -3789,6 +3940,8 @@ def rad(self, tol=.1): n_samp, tot_obs, E = self.get_params(['n_samp', 'tot_obs', 'E']) rad = [] + convert_n = lambda n, l2: n + (1 / l2) + for tn_samp, ttot_obs, tE in zip(n_samp, tot_obs, E): # Set temp params @@ -3797,19 +3950,15 @@ def rad(self, tol=.1): self.params['E'] = tE tl2 = float(tn_samp) / (tE - ttot_obs) # Harte (2011) 7.26 - e_max = 1 + (1 / tl2) - e_min = 1 + (1 / (ttot_obs * tl2)) - - num = np.round((e_max - e_min) / tol, decimals=0) - eng = np.linspace(e_min, e_max + tol, num=num) - diff = eng[1] - eng[0] - - tcdf = np.cumsum(diff * self.pmf(eng)[0]) + all_e = convert_n(np.arange(1, ttot_obs + 1), tl2) + tpmf = self.pmf(all_e)[0] + tcdf = np.cumsum(tpmf) # Observed cdf. Not quite true if some energies overlap obs_cdf = np.arange(1 / (2 * (tn_samp)), 1, 1/tn_samp) - trad = [eng[sum(oc >= tcdf) - 1] for oc in obs_cdf] + trad = [all_e[sum(oc >= tcdf) - 1] if sum(oc >= tcdf) - 1 != -1 + else all_e[0] for oc in obs_cdf] rad.append(trad) @@ -3830,14 +3979,14 @@ def fit(self, data): A list containing tuples of length two or a list containing tuples of length three. If the tuples are of length two, the first object - in a tuple is an iterable containing the community individual energy - distribution. The second object in a tuple is an iterable + in a tuple is an iterable containing the community individual + energy distribution. The second object in a tuple is an iterable containing the empirical species abundance distribution. If the tuples are of length three, the first object in the tuple is an - iterable containing the average energy distribution. The second object - in a tuple an iterable containing the community individual energy - distribution. The third object in a tuple is an iterable - containing the empirical species abundance distribution. + iterable containing the total species energy distribution. The + second object in a tuple an iterable containing the community + individual energy distribution. The third object in a tuple is an + iterable containing the empirical species abundance distribution. ''' @@ -3846,7 +3995,7 @@ def fit(self, data): if len(data[0]) == 2: ied, sad = unpack(data) elif len(data[0]) == 3: - ased, ied, sad = unpack(data) + tsed, ied, sad = unpack(data) # Use base class fit super(nu, self).fit(sad) diff --git a/empirical.py b/empirical.py index 520e472..aab57d7 100644 --- a/empirical.py +++ b/empirical.py @@ -19,6 +19,7 @@ - `sed` -- calculate species energy distribution (grid or sample) - `ied` -- calculate the community (individual) energy distribution - `ased` -- calculate the average species energy distribution +- `tsed` -- calculate the total species energy distribution - `get_sp_centers` -- - 'get_div_areas' -- return list of areas made by div_list @@ -734,6 +735,47 @@ def ased(self, criteria, normalize=True, exponent=0.75): return result + def tsed(self, criteria, normalize=True, exponent=0.75): + ''' + Calculates the total species energy distribution for each given + species in a subset. + + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy' or + 'mass'. See sad method for further requirements. + + Returns + ------- + result : list + List of tuples containing results, where the first element is a + dictionary of criteria for this calculation and second element is a + 1D ndarray of length species containing the average energy for each + species. The third element is 1D array listing identifiers for + species in the same order as they appear in the second element of + result. + + ''' + + sed = self.sed(criteria, normalize=normalize, exponent=exponent) + + result = [] + for this_sed in sed: + spp_list = list(this_sed[1].viewkeys()) + spp_list.sort() + + # Take the mean energy for each species + omega = [np.sum(this_sed[1][spp]) for spp in spp_list if + len(this_sed[1][spp]) != 0] + # Truncated spp_list if necessary + spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] + + result.append((this_sed[0], np.array(omega), np.array(spp_list))) + + return result + + def flatten_sad(sad): ''' Takes a list of tuples, like sad output, ignores keys, and converts values From a2012cd773052c059fb5279007981d2b6d9ec121 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sat, 5 Oct 2013 12:03:12 -0700 Subject: [PATCH 012/343] Changed vals parameter --- distributions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/distributions.py b/distributions.py index 1523547..74370b1 100644 --- a/distributions.py +++ b/distributions.py @@ -2154,8 +2154,8 @@ class nbd_lt(Distribution): self.var keywords ----------------- - mu : array of floats - mu parameters of nbd_lt + p : array of floats + p parameters of nbd_lt k : array of floats Aggregation parameter k is included in self.var if it is calculated in fit. @@ -2274,7 +2274,7 @@ def cdf(self, n): max_n = [np.max(tn) for tn in n] n_in = [np.arange(self.min_supp, i + 1) for i in max_n] - pmf_list = self.pmf(n_in, vals=vals) + pmf_list = self.pmf(n_in) # Calculate cdfs cdf = [] @@ -2315,7 +2315,7 @@ def nll_nb(k): self.params['tot_obs'] = ttot_obs self.params['n_samp'] = tn_samp self.params['k'] = k - return -sum(np.log(self.pmf(tdata, vals=vals)[0])) + return -sum(np.log(self.pmf(tdata)[0])) mlek = scipy.optimize.fmin(nll_nb, np.array([guess_for_k]), disp=0)[0] From 80a55277571db5c7673697933fc8709a2c3c0e73 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 22 Jan 2014 15:35:14 -0800 Subject: [PATCH 013/343] Fix ff of empirical to unix line endings, remove trailing whitespace --- empirical.py | 1622 +++++++++++++++++++++++++------------------------- 1 file changed, 811 insertions(+), 811 deletions(-) diff --git a/empirical.py b/empirical.py index 520e472..63e2b44 100644 --- a/empirical.py +++ b/empirical.py @@ -1,811 +1,811 @@ -#!/usr/bin/python - -''' -Calculating macroecological metrics for empirical or theoretical patch. Patch -is interpreted broadly as any temporally and spatially defined census. - -Classes -------- -- `Patch` -- empirical metrics for census data - -Patch Methods -------------- -- `sad` -- calculate species abundance distribution (grid or sample) -- `sar` -- calculate species-area relationship (grid or sample) -- `universal_sar` -- calculates the universal sar curve -- `ear` -- calculate endemics-area relationship (grid or sample) -- `comm` -- calculate commonality between sub-patches (grid) -- `ssad` -- calculate species-level spatial abundance distrib (grid or sample) -- `sed` -- calculate species energy distribution (grid or sample) -- `ied` -- calculate the community (individual) energy distribution -- `ased` -- calculate the average species energy distribution - -- `get_sp_centers` -- -- 'get_div_areas' -- return list of areas made by div_list - -Misc functions --------------- -- `distance` -- return Euclidean distance between two points -''' - -from __future__ import division -import numpy as np -from math import radians, cos, sin, asin, sqrt -import itertools -from copy import deepcopy -from data import DataTable - - -class Patch: - ''' - An object representing an empirical census. - - Parameters - ---------- - data_path : str - Path to csv file containing census data. - subset : dict or str - Dictionary of permanent subset to data, {'column_name': 'condition'}, - which will limit all analysis to records in which column_name meets the - condition, ie, {'year': ('==', 2005), 'x': [('>', 20), ('<', 40)]} - restricts analysis to year 2005 and x values between 20 and 40. These - conditions can also be passed to the individual methods, but subsetting - the data table up front may save analysis time. Subsetting on a string - would look something like {'name' : [('==', 'John'), ('==', 'Harry')]}. - In addition, subset can be a query string for a SQL database. - - Attributes - ---------- - data_table : object of class DataTable - Object containing patch data and metadata. - - ''' - - def __init__(self, datapath, subset = {}): - '''Initialize object of class Patch. See class documentation.''' - - # Handle csv - self.data_table = DataTable(datapath, subset=subset) - - # If datapath is sql or db the subsetting is already done. - if type(subset) == type({}): - self.data_table.table = self.data_table.get_subtable(subset) - - - def sad(self, criteria, clean=False): - ''' - Calculates an empirical species abundance distribution given criteria. - - Parameters - ---------- - criteria : dict - Dictionary of form {column_name: value}. Must contain a key with a - value of 'species' indicating the column with species identifiers - (this column must be type categorical in metadata). If a column - giving the counts of species found at a point is also in the data, - a key with the value 'count' should also be given. - - Value has a different meaning depending on column type: - - metric - number of divisions of data along this axis, int/float - - categorical - 'split' calculates each category separately, - 'whole' takes the entire column. - clean : bool - If True, all the zeros are removed from the sads. If False, sads - are left as is. - - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the abundance for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. - ''' - - spp_list, spp_col, count_col, engy_col, mass, combinations = \ - self.parse_criteria(criteria) - - if spp_col == None: - raise TypeError('No species column specified in "criteria" ' + - 'parameter') - result = [] - for comb in combinations: - - subtable = self.data_table.get_subtable(comb) - - sad_list = [] - for species in spp_list: - spp_subtable = subtable[subtable[spp_col] == species] - if count_col: - count = np.sum(spp_subtable[count_col]) - else: - count = len(spp_subtable) - sad_list.append(count) - - sad_list = np.array(sad_list) - - if clean: - ind = np.where(sad_list != 0)[0] - sad_list = sad_list[ind] - temp_spp_list = spp_list[ind] - else: - temp_spp_list = spp_list - - - result.append((comb, sad_list, temp_spp_list)) - - return result - - def ssad(self, criteria): - ''' - Calculates empirical species-level spatial abundance distributions - given criteria. - - Parameters - ---------- - criteria : dict - See Patch.sad docstring - - Returns - ------- - : tuple - Returns a tuple with two objects. The first object is an array of - dicts that correspond to the criteria used to generate each cell. - The length of the first object in equal to the number of divisions - specified. The second object is a dictionary that has length - species and each keyword is a species. Each species keyword looks - up an array with the ssad for the given species. The array that - each keyword looks up is the same length as criteria. - - - ''' - sad_return = self.sad(criteria, clean=False) - spp_list = sad_return[0][2] - combs, array_res = flatten_sad(sad_return) - ssad = {} - - for i, spp in enumerate(spp_list): - ssad[spp] = array_res[i,:] - - return combs, ssad - - def parse_criteria(self, criteria): - ''' - Parses criteria list to get all possible column combinations. - - Parameters - ---------- - criteria : dict - (See docstring for Patch.sad) - energy : bool - If False, does not return an energy column, if True, returns an - energy column. - - Returns - ------- - spp_list : ndarray - 1D array listing identifiers for species in the same order as they - appear in arrays found in result. - spp_col : str - Name of column containing species identifiers. - count_col : str - Name of column containing counts, if any. - combinations : list of dicts - List of dictionaries giving all possible combinations of criteria. - Columns not mentioned in criteria are ignored and will be averaged - over in later analyses. - - ''' - - spp_list = None - spp_col = None - count_col = None - engy_col = None - mass_col = None - combinations = [] - - # Calculate all possible combinations of columns based on criteria - # TODO: Add error checking - for key, value in criteria.items(): - - # Look for two special values indicating species and count cols - if value == 'species': - spp_list = np.unique(self.data_table.table[key]) - spp_col = key - continue - if value == 'count': - count_col = key - continue - if value == 'energy': - engy_col = key - continue - if value == 'mass': - mass_col = key - continue - - # Get levels of categorial or metric data - if value == 'split': # Categorial - levels = np.unique(self.data_table.table[key]) - levels_str = [('==' , x.astype(levels.dtype)) for x in levels] - elif value == 'whole': - # Random string to minimize chance of overlap? - levels_str = [('==','whole')] - else: # Metric - - # TODO: Throw a warning if the data is not divisible by the - # divisions specified. - try: - dmin = self.data_table.meta[(key, 'minimum')] - dmax = self.data_table.meta[(key, 'maximum')] - dprec = self.data_table.meta[(key, 'precision')] - - # TODO: Error if step < prec - step = (dmax + dprec - dmin) / value - starts = np.arange(dmin, dmax + dprec, step) - ends = starts + step - except TypeError: - raise TypeError('Unable to proceed to with values ' + - 'obtained from metadata. Please check ' + - 'the metadata file and/or parameters file') - - - starts_str = [('>=', x) for x in starts] - ends_str = [('<', x) for x in ends] - levels_str = [list(lvl) for lvl in zip(starts_str, ends_str)] - - - # Add these levels to combinations dictionary - if len(combinations) == 0: # If first criteria - for i, level in enumerate(levels_str): - combinations.append({key: level}) - else: - temp_comb = [] - for i, level in enumerate(levels_str): - exist_recs = deepcopy(combinations) - for rec in exist_recs: - rec[key] = level - temp_comb += exist_recs - combinations = temp_comb - - if len(combinations) == 0: - combinations.append({}) - - return spp_list, spp_col, count_col, engy_col, mass_col, combinations - - - - def sar(self, div_cols, div_list, criteria, form='sar', output_N=False): - ''' - Calculate an empirical species-area relationship given criteria. - - Parameters - ---------- - div_cols : tuple - Column names to divide, eg, ('x', 'y'). Must be metric. - div_list : list of tuples - List of division pairs in same order as div_cols, eg, [(2,2), - (2,4), (4,4)]. Values are number of divisions of div_col. - criteria : dict - See docstring for EPatch.sad. Here, criteria SHOULD NOT include - items referring to div_cols (if there are any, they are ignored). - form : string - 'sar' or 'ear' for species or endemics area relationship. EAR is - relative to the subtable selected after criteria is applied. - output_N : bool - Adds the column N to the output rec array which contains the - average N for a given area. - - Returns - ------- - rec_sar: structured array - Returns a structured array with fields 'items' and 'area' that - contains the average items/species for each given area specified by - critieria. - full_result : list of ndarrays - List of same length as areas containing arrays with element for - count of species or endemics in each subpatch at corresponding - area. - ''' - - # If any element in div_cols in criteria, remove from criteria - criteria = {k: v for k, v in criteria.items() if k not in div_cols} - - # Loop through div combinations (ie, areas), calc sad, and summarize - areas = [] - mean_result = [] - full_result = [] - N_result = [] - - for div in div_list: - - # Add divs to criteria dict - this_criteria = deepcopy(criteria) - for i, col in enumerate(div_cols): - this_criteria[col] = div[i] - - # Get flattened sad for all criteria and this div - sad_return = self.sad(this_criteria) - - if output_N: - N_result.append(np.mean([sum(sad[1]) for sad in sad_return])) - - flat_sad = flatten_sad(sad_return)[1] - - # Store results - if form == 'sar': - this_full = np.sum((flat_sad > 0), axis=0) - this_mean = np.mean(this_full) - elif form == 'ear': - totcnt = np.sum(flat_sad, axis=1) - totcnt_arr = \ - np.array([list(totcnt),]*np.shape(flat_sad)[1]).transpose() - - this_full = np.sum(np.equal(flat_sad, totcnt_arr), axis=0) - this_mean = np.mean(this_full) - else: - raise NotImplementedError('No SAR of form %s available' % form) - - full_result.append(this_full) - mean_result.append(this_mean) - - # Store area - area = 1 - for i, col in enumerate(div_cols): - dmin = self.data_table.meta[(col, 'minimum')] - dmax = self.data_table.meta[(col, 'maximum')] - dprec = self.data_table.meta[(col, 'precision')] - length = (dmax + dprec - dmin) - - area *= length / div[i] - - areas.append(area) - - # Return - if not output_N: - rec_sar = np.array(zip(mean_result, areas), dtype=[('items', - np.float), ('area', np.float)]) - else: - rec_sar = np.array(zip(mean_result, N_result, areas), - dtype=[('items', np.float), ('N', np.float), ('area', np.float)]) - - return rec_sar, full_result - - - def universal_sar(self, div_cols, div_list, criteria, include_full=False): - ''' - Calculates the empirical universal sar given criteria. The universal - sar calculates the slope of the SAR and the ratio of N / S at all - the areas in div_cols (where N is the total number of species and S is - the total number of species). - - This function assumes that the div_list contains halvings. If they are not, - the function will still work but the results will be meaningless. An - example a of div_list with halvings is: - - [(1,1), (1,2), (2,2), (2,4), (4,4)] - - Parameters - ---------- - div_cols : tuple - Column names to divide, eg, ('x', 'y'). Must be metric. - div_list : list of tuples - List of division pairs in same order as div_cols, eg, [(2,2), - (2,4), (4,4)]. Values are number of divisions of div_col. - criteria : dict - See docstring for EPatch.sad. Here, criteria SHOULD NOT include - items referring to div_cols (if there are any, they are ignored). - include_full : bool - If include_full = True, the division (1,1) will be included if it - was now already included. Else it will not be included. (1,1) is - equivalent to the full plot - - - Returns - ------- - z_array : a structured array - Has the columns names: - 'z' : slope of the SAR at the given area - 'S' : Number of species at the given division - 'N' : Number of individuals at the given division - 'N/S' : The ratio of N/S at the given division - - - Notes - ----- - If you give it n divisions in div_list you will get a structured array - back that has length n - 2. Therefore, if you only have one - ''' - - # If (1,1) is not included, include it - if include_full: - try: - div_list.index((1,1)) - except ValueError: - div_list.insert(0, (1,1)) - - # Run sar with the div_cols - sar = self.sar(div_cols, div_list, criteria, output_N=True)[0] - - # sort by area - sar = np.sort(sar, order=['area'])[::-1] - - # Calculate z's - if len(sar) >= 3: # Check the length of sar - z_list = [z(sar['items'][i - 1], sar['items'][i + 1]) for i in - np.arange(1, len(sar)) if sar['items'][i] != sar['items'][-1]] - else: - return np.empty(0, dtype=[('z', np.float), ('S', np.float), ('N', - np.float), ('N/S', np.float)]) - - N_over_S = sar['N'][1:len(sar) - 1] / sar['items'][1:len(sar) - 1] - - z_array = np.array(zip(z_list, sar['items'][1:len(sar) - 1], - sar['N'][1:len(sar) - 1], N_over_S), dtype=[('z', np.float), ('S', - np.float), ('N', np.float), ('N/S', np.float)]) - - return z_array - - def comm_sep(self, plot_locs, criteria, loc_unit=None): - ''' - Calculates commonality (Sorensen and Jaccard) between pairs of plots. - - Parameters - ---------- - plot_locs : dict - Dictionary with keys equal to each plot name, which must be - represented by a column in the data table, and values equal to a - tuple of the x and y coordinate of each plot - criteria : dict - See docstring for Patch.sad. - loc_unit : str - Unit of plot locations. Special cases include 'decdeg' (decimal - degrees), returns result in km. Otherwise ignored. - - Returns - ------- - result: structured array - Returns a structured array with fields plot-a and plot-b (names of - two plots), dist (distance between plots), and sorensen and jaccard - (similarity indices). Has row for each unique pair of plots. - ''' - - # Set up sad_dict with key=plot and val=clean sad for that plot - sad_dict = {} - - # Loop through all plot cols, updating criteria, and getting spp_list - for plot in plot_locs.keys(): - - # Find current count col and remove it from criteria - for crit_key in criteria.keys(): - if criteria[crit_key] == 'count': - criteria.pop(crit_key, None) - - # Add this plot as col with counts - criteria[plot] = 'count' - - # Get SAD for existing criteria with this plot as count col - sad_return = self.sad(criteria, clean=True) - - # Check that sad_return only has one element, or throw error - if len(sad_return) > 1: - raise NotImplementedError('Too many criteria for comm_sep') - - # Get unique species list for this plot and store in sad_dict - sad_dict[plot] = sad_return[0][2] - - # Set up recarray to hold Sorensen index for all pairs of plots - n_pairs = np.sum(np.arange(len(plot_locs.keys()))) - result = np.recarray((n_pairs,), dtype=[('plot-a','S32'), - ('plot-b', 'S32'), - ('spp-a', int), - ('spp-b', int), - ('dist', float), - ('sorensen', float), - ('jaccard', float)]) - - # Loop through all combinations of plots and fill in result table - row = 0 - for pair in itertools.combinations(plot_locs.keys(), 2): - - # Names of plots - plota = pair[0] - plotb = pair[1] - - result[row]['plot-a'] = plota - result[row]['plot-b'] = plotb - - # Calculate inter-plot distance - if loc_unit == 'decdeg': - result[row]['dist'] = decdeg_distance(plot_locs[plota], - plot_locs[plotb]) - else: - result[row]['dist'] = distance(plot_locs[plota], - plot_locs[plotb]) - - # Get similarity indices - spp_a = len(sad_dict[plota]) - spp_b = len(sad_dict[plotb]) - - result[row]['spp-a'] = spp_a - result[row]['spp-b'] = spp_b - - intersect = set(sad_dict[plota]).intersection(sad_dict[plotb]) - union = set(sad_dict[plota]).union(sad_dict[plotb]) - - # Fill in zero if denom is zero - if spp_a + spp_b == 0: - result[row]['sorensen'] = 0 - else: - result[row]['sorensen'] = (2*len(intersect)) / (spp_a+spp_b) - - if len(union) == 0: - result[row]['jaccard'] = 0 - else: - result[row]['jaccard'] = len(intersect) / len(union) - - # Increment row counter - row += 1 - - return result - - - def ied(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the individual energy distribution for the entire community - given the criteria - - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy'. See - sad method for further requirements. - normalize : bool - If True, this distribution is normalized by dividing by the lowest - energy value within each element of criteria. If False, returns raw - energy values. - exponent : float - The exponent of the allometric scaling relationship if energy is - calculated from mass. - - Returns - ------- - result : list - List of tuples containing results, where first element is - dictionary of criteria for this calculation and second element is a - 1D ndarray containing the energy measurement of each individual in - the subset. The third element is the full (not unique) species - list for the given criteria. - - Notes - ----- - If count_col is None or is all ones, the entire energy column for each - subtable is returned. Else, the average energy per individual, - repeated for each individual is returned. This is equivalent to the psi - distribution from Harte (2011). - - - ''' - - spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ - self.parse_criteria(criteria) - - if engy_col == None and mass_col == None: - raise ValueError("No energy or mass column given") - elif engy_col == None and mass_col != None: - mass = True - this_engy = mass_col - else: - mass = False - this_engy = engy_col - - result = [] - for comb in combinations: - - subtable = self.data_table.get_subtable(comb) - - # If all counts are not 1 - if count_col and (not np.all(subtable[count_col] == 1)): - - # Remove any zero counts - subtable = subtable[subtable[count_col] != 0] - # Convert counts to ints - temp_counts = subtable[count_col].astype(int) - - energy = np.repeat((subtable[this_engy] / - subtable[count_col]), temp_counts) - species = np.repeat(subtable[spp_col], temp_counts) - else: - energy = subtable[this_engy] - species = subtable[spp_col] - - # Convert mass to energy if mass is True - if mass: - energy = (energy ** exponent) - - # Normalizing energy - if normalize: - energy = energy / np.min(energy) - result.append((comb, energy, species)) - - return result - - def sed(self, criteria, normalize=True, exponent=0.75, clean=False): - ''' - Calculates the species-level energy distribution for each given species - in the community. - - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. - normalize : bool - If True, this distribution is normalized by dividing by the lowest - energy value within each element of criteria. If False, returns raw - energy values. - exponent : float - The exponent of the allometric scaling relationship if energy is - calculated from mass - clean : bool - If False, sed dictionary contains all species. If True, species - with no individuals are removed. This is useful when subsetting. - - Returns - ------- - result : list of tuples - Each tuple contains two objects. The first object is a dict with - the division specifications that generated the given species energy - distributions. The second object is a dict with a keyword - corresponding to each species in the spp_list. Each species - keyword looks up a np.array that contains the given species - energy distribution. - - Note - ---- - The theta distribution from Harte (2011) is a an sed. - - ''' - spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ - self.parse_criteria(criteria) - - ied = self.ied(criteria, normalize=normalize, exponent=exponent) - - result = [] - for this_ied in ied: - this_criteria_sed = {} - - for spp in spp_list: - spp_ind = (spp == this_ied[2]) - this_spp_sed = this_ied[1][spp_ind] - - if clean: # If True, don't add empty species lists - if len(this_spp_sed) > 0: - this_criteria_sed[spp] = this_spp_sed - else: - this_criteria_sed[spp] = this_spp_sed - - result.append((this_ied[0], this_criteria_sed)) - - return result - - def ased(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the average species energy distribution for each given - species in a subset. - - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. - - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the average energy for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. - - Notes - ----- - This is equivalent to the nu distribution from Harte 2011 - - ''' - - sed = self.sed(criteria, normalize=normalize, exponent=exponent) - - result = [] - for this_sed in sed: - spp_list = list(this_sed[1].viewkeys()) - spp_list.sort() - - # Take the mean energy for each species - nu = [np.mean(this_sed[1][spp]) for spp in spp_list if - len(this_sed[1][spp]) != 0] - # Truncated spp_list if necessary - spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] - - result.append((this_sed[0], np.array(nu), np.array(spp_list))) - - return result - -def flatten_sad(sad): - ''' - Takes a list of tuples, like sad output, ignores keys, and converts values - into a 2D array with each value as a column (ie, species in rows, samples - in columns. - ''' - - combs = [cmb[0] for cmb in sad] - result = np.zeros((len(sad[0][1]), len(sad))) - - for i, tup in enumerate(sad): - result[:,i] = tup[1] - - return combs, result - - -def distance(pt1, pt2): - ''' Calculate Euclidean distance between two points ''' - return np.sqrt((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) - - -def decdeg_distance(pt1, pt2): - ''' Calculate Earth surface distance (in km) between decimal latlong points - using Haversine approximation. - - http://stackoverflow.com/questions/15736995/how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude-points - ''' - lat1, lon1 = pt1 - lat2, lon2 = pt2 - - # Convert decimal degrees to radians - lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) - - # haversine formula - dlon = lon2 - lon1 - dlat = lat2 - lat1 - a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 - c = 2 * asin(sqrt(a)) - km = 6367 * c - - return km - -def divisible(dividend, precision, divisor, tol = 1e-9): - ''' - Check if dividend (here width or height of patch) is evenly divisible by - divisor (here a number of patch divs) while accounting for floating point - rounding issues. - ''' - if divisor == 0: - return False - if divisor > round(dividend / precision): - return False - - quot_raw = (dividend / precision) / divisor - quot_round = round(quot_raw) - diff = abs(quot_raw - quot_round) - - if diff < tol: - return True - else: - return False - - -def rnd(num): - ''' - Round num to number of decimal places in precision. Used to avoid issues - with floating points in the patch and subpatch width and height that make - subpatches not lie exactly on even divisions of patch. - ''' - return round(num, 6) - -def z(doubleS, halfS): - '''Calculates the z for a double S value and a half S value''' - - return np.log(doubleS / halfS) / (2 * np.log(2)) +#!/usr/bin/python + +''' +Calculating macroecological metrics for empirical or theoretical patch. Patch +is interpreted broadly as any temporally and spatially defined census. + +Classes +------- +- `Patch` -- empirical metrics for census data + +Patch Methods +------------- +- `sad` -- calculate species abundance distribution (grid or sample) +- `sar` -- calculate species-area relationship (grid or sample) +- `universal_sar` -- calculates the universal sar curve +- `ear` -- calculate endemics-area relationship (grid or sample) +- `comm` -- calculate commonality between sub-patches (grid) +- `ssad` -- calculate species-level spatial abundance distrib (grid or sample) +- `sed` -- calculate species energy distribution (grid or sample) +- `ied` -- calculate the community (individual) energy distribution +- `ased` -- calculate the average species energy distribution + +- `get_sp_centers` -- +- 'get_div_areas' -- return list of areas made by div_list + +Misc functions +-------------- +- `distance` -- return Euclidean distance between two points +''' + +from __future__ import division +import numpy as np +from math import radians, cos, sin, asin, sqrt +import itertools +from copy import deepcopy +from data import DataTable + + +class Patch: + ''' + An object representing an empirical census. + + Parameters + ---------- + data_path : str + Path to csv file containing census data. + subset : dict or str + Dictionary of permanent subset to data, {'column_name': 'condition'}, + which will limit all analysis to records in which column_name meets the + condition, ie, {'year': ('==', 2005), 'x': [('>', 20), ('<', 40)]} + restricts analysis to year 2005 and x values between 20 and 40. These + conditions can also be passed to the individual methods, but subsetting + the data table up front may save analysis time. Subsetting on a string + would look something like {'name' : [('==', 'John'), ('==', 'Harry')]}. + In addition, subset can be a query string for a SQL database. + + Attributes + ---------- + data_table : object of class DataTable + Object containing patch data and metadata. + + ''' + + def __init__(self, datapath, subset = {}): + '''Initialize object of class Patch. See class documentation.''' + + # Handle csv + self.data_table = DataTable(datapath, subset=subset) + + # If datapath is sql or db the subsetting is already done. + if type(subset) == type({}): + self.data_table.table = self.data_table.get_subtable(subset) + + + def sad(self, criteria, clean=False): + ''' + Calculates an empirical species abundance distribution given criteria. + + Parameters + ---------- + criteria : dict + Dictionary of form {column_name: value}. Must contain a key with a + value of 'species' indicating the column with species identifiers + (this column must be type categorical in metadata). If a column + giving the counts of species found at a point is also in the data, + a key with the value 'count' should also be given. + + Value has a different meaning depending on column type: + - metric - number of divisions of data along this axis, int/float + - categorical - 'split' calculates each category separately, + 'whole' takes the entire column. + clean : bool + If True, all the zeros are removed from the sads. If False, sads + are left as is. + + Returns + ------- + result : list + List of tuples containing results, where the first element is a + dictionary of criteria for this calculation and second element is a + 1D ndarray of length species containing the abundance for each + species. The third element is 1D array listing identifiers for + species in the same order as they appear in the second element of + result. + ''' + + spp_list, spp_col, count_col, engy_col, mass, combinations = \ + self.parse_criteria(criteria) + + if spp_col == None: + raise TypeError('No species column specified in "criteria" ' + + 'parameter') + result = [] + for comb in combinations: + + subtable = self.data_table.get_subtable(comb) + + sad_list = [] + for species in spp_list: + spp_subtable = subtable[subtable[spp_col] == species] + if count_col: + count = np.sum(spp_subtable[count_col]) + else: + count = len(spp_subtable) + sad_list.append(count) + + sad_list = np.array(sad_list) + + if clean: + ind = np.where(sad_list != 0)[0] + sad_list = sad_list[ind] + temp_spp_list = spp_list[ind] + else: + temp_spp_list = spp_list + + + result.append((comb, sad_list, temp_spp_list)) + + return result + + def ssad(self, criteria): + ''' + Calculates empirical species-level spatial abundance distributions + given criteria. + + Parameters + ---------- + criteria : dict + See Patch.sad docstring + + Returns + ------- + : tuple + Returns a tuple with two objects. The first object is an array of + dicts that correspond to the criteria used to generate each cell. + The length of the first object in equal to the number of divisions + specified. The second object is a dictionary that has length + species and each keyword is a species. Each species keyword looks + up an array with the ssad for the given species. The array that + each keyword looks up is the same length as criteria. + + + ''' + sad_return = self.sad(criteria, clean=False) + spp_list = sad_return[0][2] + combs, array_res = flatten_sad(sad_return) + ssad = {} + + for i, spp in enumerate(spp_list): + ssad[spp] = array_res[i,:] + + return combs, ssad + + def parse_criteria(self, criteria): + ''' + Parses criteria list to get all possible column combinations. + + Parameters + ---------- + criteria : dict + (See docstring for Patch.sad) + energy : bool + If False, does not return an energy column, if True, returns an + energy column. + + Returns + ------- + spp_list : ndarray + 1D array listing identifiers for species in the same order as they + appear in arrays found in result. + spp_col : str + Name of column containing species identifiers. + count_col : str + Name of column containing counts, if any. + combinations : list of dicts + List of dictionaries giving all possible combinations of criteria. + Columns not mentioned in criteria are ignored and will be averaged + over in later analyses. + + ''' + + spp_list = None + spp_col = None + count_col = None + engy_col = None + mass_col = None + combinations = [] + + # Calculate all possible combinations of columns based on criteria + # TODO: Add error checking + for key, value in criteria.items(): + + # Look for two special values indicating species and count cols + if value == 'species': + spp_list = np.unique(self.data_table.table[key]) + spp_col = key + continue + if value == 'count': + count_col = key + continue + if value == 'energy': + engy_col = key + continue + if value == 'mass': + mass_col = key + continue + + # Get levels of categorial or metric data + if value == 'split': # Categorial + levels = np.unique(self.data_table.table[key]) + levels_str = [('==' , x.astype(levels.dtype)) for x in levels] + elif value == 'whole': + # Random string to minimize chance of overlap? + levels_str = [('==','whole')] + else: # Metric + + # TODO: Throw a warning if the data is not divisible by the + # divisions specified. + try: + dmin = self.data_table.meta[(key, 'minimum')] + dmax = self.data_table.meta[(key, 'maximum')] + dprec = self.data_table.meta[(key, 'precision')] + + # TODO: Error if step < prec + step = (dmax + dprec - dmin) / value + starts = np.arange(dmin, dmax + dprec, step) + ends = starts + step + except TypeError: + raise TypeError('Unable to proceed to with values ' + + 'obtained from metadata. Please check ' + + 'the metadata file and/or parameters file') + + + starts_str = [('>=', x) for x in starts] + ends_str = [('<', x) for x in ends] + levels_str = [list(lvl) for lvl in zip(starts_str, ends_str)] + + + # Add these levels to combinations dictionary + if len(combinations) == 0: # If first criteria + for i, level in enumerate(levels_str): + combinations.append({key: level}) + else: + temp_comb = [] + for i, level in enumerate(levels_str): + exist_recs = deepcopy(combinations) + for rec in exist_recs: + rec[key] = level + temp_comb += exist_recs + combinations = temp_comb + + if len(combinations) == 0: + combinations.append({}) + + return spp_list, spp_col, count_col, engy_col, mass_col, combinations + + + + def sar(self, div_cols, div_list, criteria, form='sar', output_N=False): + ''' + Calculate an empirical species-area relationship given criteria. + + Parameters + ---------- + div_cols : tuple + Column names to divide, eg, ('x', 'y'). Must be metric. + div_list : list of tuples + List of division pairs in same order as div_cols, eg, [(2,2), + (2,4), (4,4)]. Values are number of divisions of div_col. + criteria : dict + See docstring for EPatch.sad. Here, criteria SHOULD NOT include + items referring to div_cols (if there are any, they are ignored). + form : string + 'sar' or 'ear' for species or endemics area relationship. EAR is + relative to the subtable selected after criteria is applied. + output_N : bool + Adds the column N to the output rec array which contains the + average N for a given area. + + Returns + ------- + rec_sar: structured array + Returns a structured array with fields 'items' and 'area' that + contains the average items/species for each given area specified by + critieria. + full_result : list of ndarrays + List of same length as areas containing arrays with element for + count of species or endemics in each subpatch at corresponding + area. + ''' + + # If any element in div_cols in criteria, remove from criteria + criteria = {k: v for k, v in criteria.items() if k not in div_cols} + + # Loop through div combinations (ie, areas), calc sad, and summarize + areas = [] + mean_result = [] + full_result = [] + N_result = [] + + for div in div_list: + + # Add divs to criteria dict + this_criteria = deepcopy(criteria) + for i, col in enumerate(div_cols): + this_criteria[col] = div[i] + + # Get flattened sad for all criteria and this div + sad_return = self.sad(this_criteria) + + if output_N: + N_result.append(np.mean([sum(sad[1]) for sad in sad_return])) + + flat_sad = flatten_sad(sad_return)[1] + + # Store results + if form == 'sar': + this_full = np.sum((flat_sad > 0), axis=0) + this_mean = np.mean(this_full) + elif form == 'ear': + totcnt = np.sum(flat_sad, axis=1) + totcnt_arr = \ + np.array([list(totcnt),]*np.shape(flat_sad)[1]).transpose() + + this_full = np.sum(np.equal(flat_sad, totcnt_arr), axis=0) + this_mean = np.mean(this_full) + else: + raise NotImplementedError('No SAR of form %s available' % form) + + full_result.append(this_full) + mean_result.append(this_mean) + + # Store area + area = 1 + for i, col in enumerate(div_cols): + dmin = self.data_table.meta[(col, 'minimum')] + dmax = self.data_table.meta[(col, 'maximum')] + dprec = self.data_table.meta[(col, 'precision')] + length = (dmax + dprec - dmin) + + area *= length / div[i] + + areas.append(area) + + # Return + if not output_N: + rec_sar = np.array(zip(mean_result, areas), dtype=[('items', + np.float), ('area', np.float)]) + else: + rec_sar = np.array(zip(mean_result, N_result, areas), + dtype=[('items', np.float), ('N', np.float), ('area', np.float)]) + + return rec_sar, full_result + + + def universal_sar(self, div_cols, div_list, criteria, include_full=False): + ''' + Calculates the empirical universal sar given criteria. The universal + sar calculates the slope of the SAR and the ratio of N / S at all + the areas in div_cols (where N is the total number of species and S is + the total number of species). + + This function assumes that the div_list contains halvings. If they are not, + the function will still work but the results will be meaningless. An + example a of div_list with halvings is: + + [(1,1), (1,2), (2,2), (2,4), (4,4)] + + Parameters + ---------- + div_cols : tuple + Column names to divide, eg, ('x', 'y'). Must be metric. + div_list : list of tuples + List of division pairs in same order as div_cols, eg, [(2,2), + (2,4), (4,4)]. Values are number of divisions of div_col. + criteria : dict + See docstring for EPatch.sad. Here, criteria SHOULD NOT include + items referring to div_cols (if there are any, they are ignored). + include_full : bool + If include_full = True, the division (1,1) will be included if it + was now already included. Else it will not be included. (1,1) is + equivalent to the full plot + + + Returns + ------- + z_array : a structured array + Has the columns names: + 'z' : slope of the SAR at the given area + 'S' : Number of species at the given division + 'N' : Number of individuals at the given division + 'N/S' : The ratio of N/S at the given division + + + Notes + ----- + If you give it n divisions in div_list you will get a structured array + back that has length n - 2. Therefore, if you only have one + ''' + + # If (1,1) is not included, include it + if include_full: + try: + div_list.index((1,1)) + except ValueError: + div_list.insert(0, (1,1)) + + # Run sar with the div_cols + sar = self.sar(div_cols, div_list, criteria, output_N=True)[0] + + # sort by area + sar = np.sort(sar, order=['area'])[::-1] + + # Calculate z's + if len(sar) >= 3: # Check the length of sar + z_list = [z(sar['items'][i - 1], sar['items'][i + 1]) for i in + np.arange(1, len(sar)) if sar['items'][i] != sar['items'][-1]] + else: + return np.empty(0, dtype=[('z', np.float), ('S', np.float), ('N', + np.float), ('N/S', np.float)]) + + N_over_S = sar['N'][1:len(sar) - 1] / sar['items'][1:len(sar) - 1] + + z_array = np.array(zip(z_list, sar['items'][1:len(sar) - 1], + sar['N'][1:len(sar) - 1], N_over_S), dtype=[('z', np.float), ('S', + np.float), ('N', np.float), ('N/S', np.float)]) + + return z_array + + def comm_sep(self, plot_locs, criteria, loc_unit=None): + ''' + Calculates commonality (Sorensen and Jaccard) between pairs of plots. + + Parameters + ---------- + plot_locs : dict + Dictionary with keys equal to each plot name, which must be + represented by a column in the data table, and values equal to a + tuple of the x and y coordinate of each plot + criteria : dict + See docstring for Patch.sad. + loc_unit : str + Unit of plot locations. Special cases include 'decdeg' (decimal + degrees), returns result in km. Otherwise ignored. + + Returns + ------- + result: structured array + Returns a structured array with fields plot-a and plot-b (names of + two plots), dist (distance between plots), and sorensen and jaccard + (similarity indices). Has row for each unique pair of plots. + ''' + + # Set up sad_dict with key=plot and val=clean sad for that plot + sad_dict = {} + + # Loop through all plot cols, updating criteria, and getting spp_list + for plot in plot_locs.keys(): + + # Find current count col and remove it from criteria + for crit_key in criteria.keys(): + if criteria[crit_key] == 'count': + criteria.pop(crit_key, None) + + # Add this plot as col with counts + criteria[plot] = 'count' + + # Get SAD for existing criteria with this plot as count col + sad_return = self.sad(criteria, clean=True) + + # Check that sad_return only has one element, or throw error + if len(sad_return) > 1: + raise NotImplementedError('Too many criteria for comm_sep') + + # Get unique species list for this plot and store in sad_dict + sad_dict[plot] = sad_return[0][2] + + # Set up recarray to hold Sorensen index for all pairs of plots + n_pairs = np.sum(np.arange(len(plot_locs.keys()))) + result = np.recarray((n_pairs,), dtype=[('plot-a','S32'), + ('plot-b', 'S32'), + ('spp-a', int), + ('spp-b', int), + ('dist', float), + ('sorensen', float), + ('jaccard', float)]) + + # Loop through all combinations of plots and fill in result table + row = 0 + for pair in itertools.combinations(plot_locs.keys(), 2): + + # Names of plots + plota = pair[0] + plotb = pair[1] + + result[row]['plot-a'] = plota + result[row]['plot-b'] = plotb + + # Calculate inter-plot distance + if loc_unit == 'decdeg': + result[row]['dist'] = decdeg_distance(plot_locs[plota], + plot_locs[plotb]) + else: + result[row]['dist'] = distance(plot_locs[plota], + plot_locs[plotb]) + + # Get similarity indices + spp_a = len(sad_dict[plota]) + spp_b = len(sad_dict[plotb]) + + result[row]['spp-a'] = spp_a + result[row]['spp-b'] = spp_b + + intersect = set(sad_dict[plota]).intersection(sad_dict[plotb]) + union = set(sad_dict[plota]).union(sad_dict[plotb]) + + # Fill in zero if denom is zero + if spp_a + spp_b == 0: + result[row]['sorensen'] = 0 + else: + result[row]['sorensen'] = (2*len(intersect)) / (spp_a+spp_b) + + if len(union) == 0: + result[row]['jaccard'] = 0 + else: + result[row]['jaccard'] = len(intersect) / len(union) + + # Increment row counter + row += 1 + + return result + + + def ied(self, criteria, normalize=True, exponent=0.75): + ''' + Calculates the individual energy distribution for the entire community + given the criteria + + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy'. See + sad method for further requirements. + normalize : bool + If True, this distribution is normalized by dividing by the lowest + energy value within each element of criteria. If False, returns raw + energy values. + exponent : float + The exponent of the allometric scaling relationship if energy is + calculated from mass. + + Returns + ------- + result : list + List of tuples containing results, where first element is + dictionary of criteria for this calculation and second element is a + 1D ndarray containing the energy measurement of each individual in + the subset. The third element is the full (not unique) species + list for the given criteria. + + Notes + ----- + If count_col is None or is all ones, the entire energy column for each + subtable is returned. Else, the average energy per individual, + repeated for each individual is returned. This is equivalent to the psi + distribution from Harte (2011). + + + ''' + + spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ + self.parse_criteria(criteria) + + if engy_col == None and mass_col == None: + raise ValueError("No energy or mass column given") + elif engy_col == None and mass_col != None: + mass = True + this_engy = mass_col + else: + mass = False + this_engy = engy_col + + result = [] + for comb in combinations: + + subtable = self.data_table.get_subtable(comb) + + # If all counts are not 1 + if count_col and (not np.all(subtable[count_col] == 1)): + + # Remove any zero counts + subtable = subtable[subtable[count_col] != 0] + # Convert counts to ints + temp_counts = subtable[count_col].astype(int) + + energy = np.repeat((subtable[this_engy] / + subtable[count_col]), temp_counts) + species = np.repeat(subtable[spp_col], temp_counts) + else: + energy = subtable[this_engy] + species = subtable[spp_col] + + # Convert mass to energy if mass is True + if mass: + energy = (energy ** exponent) + + # Normalizing energy + if normalize: + energy = energy / np.min(energy) + result.append((comb, energy, species)) + + return result + + def sed(self, criteria, normalize=True, exponent=0.75, clean=False): + ''' + Calculates the species-level energy distribution for each given species + in the community. + + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy' or + 'mass'. See sad method for further requirements. + normalize : bool + If True, this distribution is normalized by dividing by the lowest + energy value within each element of criteria. If False, returns raw + energy values. + exponent : float + The exponent of the allometric scaling relationship if energy is + calculated from mass + clean : bool + If False, sed dictionary contains all species. If True, species + with no individuals are removed. This is useful when subsetting. + + Returns + ------- + result : list of tuples + Each tuple contains two objects. The first object is a dict with + the division specifications that generated the given species energy + distributions. The second object is a dict with a keyword + corresponding to each species in the spp_list. Each species + keyword looks up a np.array that contains the given species + energy distribution. + + Note + ---- + The theta distribution from Harte (2011) is a an sed. + + ''' + spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ + self.parse_criteria(criteria) + + ied = self.ied(criteria, normalize=normalize, exponent=exponent) + + result = [] + for this_ied in ied: + this_criteria_sed = {} + + for spp in spp_list: + spp_ind = (spp == this_ied[2]) + this_spp_sed = this_ied[1][spp_ind] + + if clean: # If True, don't add empty species lists + if len(this_spp_sed) > 0: + this_criteria_sed[spp] = this_spp_sed + else: + this_criteria_sed[spp] = this_spp_sed + + result.append((this_ied[0], this_criteria_sed)) + + return result + + def ased(self, criteria, normalize=True, exponent=0.75): + ''' + Calculates the average species energy distribution for each given + species in a subset. + + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy' or + 'mass'. See sad method for further requirements. + + Returns + ------- + result : list + List of tuples containing results, where the first element is a + dictionary of criteria for this calculation and second element is a + 1D ndarray of length species containing the average energy for each + species. The third element is 1D array listing identifiers for + species in the same order as they appear in the second element of + result. + + Notes + ----- + This is equivalent to the nu distribution from Harte 2011 + + ''' + + sed = self.sed(criteria, normalize=normalize, exponent=exponent) + + result = [] + for this_sed in sed: + spp_list = list(this_sed[1].viewkeys()) + spp_list.sort() + + # Take the mean energy for each species + nu = [np.mean(this_sed[1][spp]) for spp in spp_list if + len(this_sed[1][spp]) != 0] + # Truncated spp_list if necessary + spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] + + result.append((this_sed[0], np.array(nu), np.array(spp_list))) + + return result + +def flatten_sad(sad): + ''' + Takes a list of tuples, like sad output, ignores keys, and converts values + into a 2D array with each value as a column (ie, species in rows, samples + in columns. + ''' + + combs = [cmb[0] for cmb in sad] + result = np.zeros((len(sad[0][1]), len(sad))) + + for i, tup in enumerate(sad): + result[:,i] = tup[1] + + return combs, result + + +def distance(pt1, pt2): + ''' Calculate Euclidean distance between two points ''' + return np.sqrt((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) + + +def decdeg_distance(pt1, pt2): + ''' Calculate Earth surface distance (in km) between decimal latlong points + using Haversine approximation. + + http://stackoverflow.com/questions/15736995/how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude-points + ''' + lat1, lon1 = pt1 + lat2, lon2 = pt2 + + # Convert decimal degrees to radians + lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) + + # haversine formula + dlon = lon2 - lon1 + dlat = lat2 - lat1 + a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 + c = 2 * asin(sqrt(a)) + km = 6367 * c + + return km + +def divisible(dividend, precision, divisor, tol = 1e-9): + ''' + Check if dividend (here width or height of patch) is evenly divisible by + divisor (here a number of patch divs) while accounting for floating point + rounding issues. + ''' + if divisor == 0: + return False + if divisor > round(dividend / precision): + return False + + quot_raw = (dividend / precision) / divisor + quot_round = round(quot_raw) + diff = abs(quot_raw - quot_round) + + if diff < tol: + return True + else: + return False + + +def rnd(num): + ''' + Round num to number of decimal places in precision. Used to avoid issues + with floating points in the patch and subpatch width and height that make + subpatches not lie exactly on even divisions of patch. + ''' + return round(num, 6) + +def z(doubleS, halfS): + '''Calculates the z for a double S value and a half S value''' + + return np.log(doubleS / halfS) / (2 * np.log(2)) From a1506c86360d42fcc4830523454554ca2bc62157 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 22 Jan 2014 15:35:54 -0800 Subject: [PATCH 014/343] Fix line wrap --- empirical.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/empirical.py b/empirical.py index 63e2b44..bf1d5e6 100644 --- a/empirical.py +++ b/empirical.py @@ -380,9 +380,9 @@ def universal_sar(self, div_cols, div_list, criteria, include_full=False): the areas in div_cols (where N is the total number of species and S is the total number of species). - This function assumes that the div_list contains halvings. If they are not, - the function will still work but the results will be meaningless. An - example a of div_list with halvings is: + This function assumes that the div_list contains halvings. If they are + not, the function will still work but the results will be meaningless. + An example a of div_list with halvings is: [(1,1), (1,2), (2,2), (2,4), (4,4)] From 8031b2a38e693d5e74973b2d5955f5fc26a95af2 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 22 Jan 2014 16:50:50 -0800 Subject: [PATCH 015/343] Move parse method of Patch up below init --- empirical.py | 198 +++++++++++++++++++++++++-------------------------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/empirical.py b/empirical.py index bf1d5e6..9988f4b 100644 --- a/empirical.py +++ b/empirical.py @@ -72,105 +72,6 @@ def __init__(self, datapath, subset = {}): self.data_table.table = self.data_table.get_subtable(subset) - def sad(self, criteria, clean=False): - ''' - Calculates an empirical species abundance distribution given criteria. - - Parameters - ---------- - criteria : dict - Dictionary of form {column_name: value}. Must contain a key with a - value of 'species' indicating the column with species identifiers - (this column must be type categorical in metadata). If a column - giving the counts of species found at a point is also in the data, - a key with the value 'count' should also be given. - - Value has a different meaning depending on column type: - - metric - number of divisions of data along this axis, int/float - - categorical - 'split' calculates each category separately, - 'whole' takes the entire column. - clean : bool - If True, all the zeros are removed from the sads. If False, sads - are left as is. - - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the abundance for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. - ''' - - spp_list, spp_col, count_col, engy_col, mass, combinations = \ - self.parse_criteria(criteria) - - if spp_col == None: - raise TypeError('No species column specified in "criteria" ' + - 'parameter') - result = [] - for comb in combinations: - - subtable = self.data_table.get_subtable(comb) - - sad_list = [] - for species in spp_list: - spp_subtable = subtable[subtable[spp_col] == species] - if count_col: - count = np.sum(spp_subtable[count_col]) - else: - count = len(spp_subtable) - sad_list.append(count) - - sad_list = np.array(sad_list) - - if clean: - ind = np.where(sad_list != 0)[0] - sad_list = sad_list[ind] - temp_spp_list = spp_list[ind] - else: - temp_spp_list = spp_list - - - result.append((comb, sad_list, temp_spp_list)) - - return result - - def ssad(self, criteria): - ''' - Calculates empirical species-level spatial abundance distributions - given criteria. - - Parameters - ---------- - criteria : dict - See Patch.sad docstring - - Returns - ------- - : tuple - Returns a tuple with two objects. The first object is an array of - dicts that correspond to the criteria used to generate each cell. - The length of the first object in equal to the number of divisions - specified. The second object is a dictionary that has length - species and each keyword is a species. Each species keyword looks - up an array with the ssad for the given species. The array that - each keyword looks up is the same length as criteria. - - - ''' - sad_return = self.sad(criteria, clean=False) - spp_list = sad_return[0][2] - combs, array_res = flatten_sad(sad_return) - ssad = {} - - for i, spp in enumerate(spp_list): - ssad[spp] = array_res[i,:] - - return combs, ssad - def parse_criteria(self, criteria): ''' Parses criteria list to get all possible column combinations. @@ -275,6 +176,105 @@ def parse_criteria(self, criteria): return spp_list, spp_col, count_col, engy_col, mass_col, combinations + def sad(self, criteria, clean=False): + ''' + Calculates an empirical species abundance distribution given criteria. + + Parameters + ---------- + criteria : dict + Dictionary of form {column_name: value}. Must contain a key with a + value of 'species' indicating the column with species identifiers + (this column must be type categorical in metadata). If a column + giving the counts of species found at a point is also in the data, + a key with the value 'count' should also be given. + + Value has a different meaning depending on column type: + - metric - number of divisions of data along this axis, int/float + - categorical - 'split' calculates each category separately, + 'whole' takes the entire column. + clean : bool + If True, all the zeros are removed from the sads. If False, sads + are left as is. + + Returns + ------- + result : list + List of tuples containing results, where the first element is a + dictionary of criteria for this calculation and second element is a + 1D ndarray of length species containing the abundance for each + species. The third element is 1D array listing identifiers for + species in the same order as they appear in the second element of + result. + ''' + + spp_list, spp_col, count_col, engy_col, mass, combinations = \ + self.parse_criteria(criteria) + + if spp_col == None: + raise TypeError('No species column specified in "criteria" ' + + 'parameter') + result = [] + for comb in combinations: + + subtable = self.data_table.get_subtable(comb) + + sad_list = [] + for species in spp_list: + spp_subtable = subtable[subtable[spp_col] == species] + if count_col: + count = np.sum(spp_subtable[count_col]) + else: + count = len(spp_subtable) + sad_list.append(count) + + sad_list = np.array(sad_list) + + if clean: + ind = np.where(sad_list != 0)[0] + sad_list = sad_list[ind] + temp_spp_list = spp_list[ind] + else: + temp_spp_list = spp_list + + + result.append((comb, sad_list, temp_spp_list)) + + return result + + def ssad(self, criteria): + ''' + Calculates empirical species-level spatial abundance distributions + given criteria. + + Parameters + ---------- + criteria : dict + See Patch.sad docstring + + Returns + ------- + : tuple + Returns a tuple with two objects. The first object is an array of + dicts that correspond to the criteria used to generate each cell. + The length of the first object in equal to the number of divisions + specified. The second object is a dictionary that has length + species and each keyword is a species. Each species keyword looks + up an array with the ssad for the given species. The array that + each keyword looks up is the same length as criteria. + + + ''' + sad_return = self.sad(criteria, clean=False) + spp_list = sad_return[0][2] + combs, array_res = flatten_sad(sad_return) + ssad = {} + + for i, spp in enumerate(spp_list): + ssad[spp] = array_res[i,:] + + return combs, ssad + def sar(self, div_cols, div_list, criteria, form='sar', output_N=False): ''' From 30f5ca602e20a43441f8a0f6603fad19b1243fa5 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 23 Jan 2014 15:38:14 -0800 Subject: [PATCH 016/343] Fix whitespace in test_empirical --- test_empirical.py | 57 ++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/test_empirical.py b/test_empirical.py index 281cda8..5fd53f7 100644 --- a/test_empirical.py +++ b/test_empirical.py @@ -33,7 +33,7 @@ def setUp(self): self.pat1 = Patch('xyfile5.csv') # Line below sets metadata manually-no metadata file loaded - self.pat1.data_table.meta = self.xymeta5 + self.pat1.data_table.meta = self.xymeta5 self.xyfile6 = open('xyfile6.csv', 'w') self.xyfile6.write('''spp_code, x, y, count @@ -146,7 +146,7 @@ def setUp(self): 'precision'): None, ('count', 'type'): 'ratio'} self.pat5 = Patch('xyfile9.csv') - self.pat5.data_table.meta = self.xymeta9 + self.pat5.data_table.meta = self.xymeta9 self.xyfile10 = open('xyfile10.csv', 'w') self.xyfile10.write('''spp_code, x, y, count a, 0, 0, 1 @@ -291,14 +291,14 @@ def test_patch_init(self): # Testing that metadata was set correctly self.assertTrue(self.pat1.data_table.meta[('x', 'maximum')] == .2) - + def test_sad(self): - + # Test correct result with 'whole' and one division - sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', + sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', 'x': 1}) self.assertTrue(np.array_equal(sad[0][1], np.array([4,2]))) - sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', + sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', 'x': 'whole'}) self.assertTrue(np.array_equal(sad[0][1], np.array([4,2]))) sad = self.pat4.sad({'spp_code': 'species', 'count' :'count', 'x': 1}) @@ -337,14 +337,14 @@ def test_sad(self): def test_parse_criteria(self): - # Checking parse returns what we would expect + # Checking parse returns what we would expect pars = self.pat4.parse_criteria({'spp_code': 'species', 'count': 'count', 'x': 1}) self.assertTrue(pars[1] == 'spp_code') self.assertTrue(pars[2] == 'count') # Test that energy, mass and count col are None - pars = self.pat4.parse_criteria({'spp_code': 'species', + pars = self.pat4.parse_criteria({'spp_code': 'species', 'y': 'whole'}) self.assertTrue((pars[2] == None) and (pars[3] == None) and (pars[4] == None)) @@ -355,7 +355,7 @@ def test_parse_criteria(self): # Make sure if count is not passed, no error is thrown self.pat3.parse_criteria({'spp_code': 'species'}) - # Check energy and mass returns + # Check energy and mass returns pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': 'count', 'energy': 'energy'}) @@ -370,7 +370,7 @@ def test_parse_criteria(self): # TODO: Test that error is thrown if step < prec def test_sar(self): - + # Checking that sar function returns correct S0 for full plot sar = self.pat3.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', 'count': 'count'}) @@ -388,7 +388,7 @@ def test_sar(self): ear = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', 'count': 'count'}, form='ear') self.assertTrue(np.array_equal(ear[1][1], np.array([0,1,0,0]))) - + # Test that returned areas are correct sar = self.pat1.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', 'count': 'count'}) @@ -403,31 +403,32 @@ def test_universal_sar(self): vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2), (2,4), (4,4)], criteria) self.assertTrue(len(vals) == 3) - + # If (1,1) is not passed in it should have a length of zero vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2)], criteria) self.assertTrue(len(vals) == 0) # If (1,1) is not passed in but include_full == True should have len - # equal to 1 - vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2), (2,4)], criteria, + # equal to 1 + vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2), (2,4)], + criteria, include_full=True) self.assertTrue(len(vals) == 2) # Test that I get the correct z-value back - vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2)], + vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2)], criteria) self.assertTrue(np.round(vals['z'][0], decimals=4) == 0.3390) # If I pass in something other than a halving I should still get # something back - vals = self.pat8.universal_sar(div_cols, [(1,1), (2,2), (2,4), (4,4)], + vals = self.pat8.universal_sar(div_cols, [(1,1), (2,2), (2,4), (4,4)], criteria) self.assertTrue(len(vals) == 2) def test_comm_sep(self): - # Create result recarray + # Create result recarray comm = self.pat9.comm_sep({'plot1': (0,0), 'plot2': (0,1), 'plot3': (3,4)}, {'spp_code': 'species', 'count': 'count'}) @@ -435,16 +436,16 @@ def test_comm_sep(self): # Create result recarray with dec degree locs comm_decdeg = self.pat9.comm_sep({'plot1': (9.1,79.0), 'plot2': (9.2,79.5), 'plot3': (12.7,50)}, - {'spp_code': 'species', 'count': 'count'}, + {'spp_code': 'species', 'count': 'count'}, loc_unit='decdeg') # Check distances dist_sort = np.sort(comm['dist']) - np.testing.assert_array_almost_equal(dist_sort, np.array((1,4.242,5)), + np.testing.assert_array_almost_equal(dist_sort, np.array((1,4.242,5)), 3) # Check distances dec degree - # TODO: Find exact third party comparison formula - formulas online use + # TODO: Find exact third party comparison formula - formulas online use # different radii, etc. and give approx same answer dist_sort = np.sort(comm_decdeg['dist']) #np.testing.assert_array_almost_equal(dist_sort, @@ -457,7 +458,7 @@ def test_comm_sep(self): # Check Sorensen - 2 zeros from empty plot1 sor_sort = np.sort(comm['sorensen']) - np.testing.assert_array_almost_equal(sor_sort, + np.testing.assert_array_almost_equal(sor_sort, np.array((0,0,0.571428571)), 5) # Check Jaccard - 2 zeros from empty plot1 @@ -465,13 +466,13 @@ def test_comm_sep(self): np.testing.assert_array_almost_equal(jac_sort, np.array((0,0,0.4)), 5) def test_ssad(self): - + # Check that ssad does not lose any individuals ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count'}) sad = self.pat2.sad({'spp_code': 'species', 'count': 'count'}) sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) - + ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count'}) sad = self.pat6.sad({'spp_code': 'species', 'count': 'count'}) sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) @@ -491,21 +492,21 @@ def test_ssad(self): self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) - + def test_ied(self): - + # Test correct length of result eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', 'energy': 'energy'}) self.assertTrue(len(eng[0][1]) == 6) # Test error if energy column is missing - self.assertRaises(ValueError, self.pat5.ied, + self.assertRaises(ValueError, self.pat5.ied, {'spp_code': 'species', 'count': 'count'}) # Test normalize is working eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', - 'energy': 'energy', 'x': 2}) + 'energy': 'energy', 'x': 2}) self.assertTrue(np.array_equal(eng[1][1], np.array([1]))) self.assertTrue(len(eng[0][1]) == 5) @@ -515,7 +516,7 @@ def test_ied(self): self.assertTrue(np.array_equal(eng[0][1], np.array([17,17,12,23,45, 110]))) - # Test that energy overrides mass + # Test that energy overrides mass eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', 'mass' : 'mass', 'energy' : 'energy'}, normalize=False) self.assertTrue(np.array_equal(eng[0][1], np.array([.5,.5,2,3,4,5]))) From e8149d0519713b667c67e67cb89c63a3f4b2a706 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 24 Jan 2014 11:32:20 -0800 Subject: [PATCH 017/343] Clarify meaning of spp_list in parse_criteria --- empirical.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/empirical.py b/empirical.py index 9988f4b..0a4f0f8 100644 --- a/empirical.py +++ b/empirical.py @@ -88,7 +88,9 @@ def parse_criteria(self, criteria): ------- spp_list : ndarray 1D array listing identifiers for species in the same order as they - appear in arrays found in result. + appear in arrays found in result. Contains all species in table, + although all species may not appear in subtables that are defined + by combinations. spp_col : str Name of column containing species identifiers. count_col : str From 117d625992dc510d413cf5f784b7617c0e422aad Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 24 Jan 2014 11:32:54 -0800 Subject: [PATCH 018/343] Partial pair_dist method using pdist for low abundance species --- empirical.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/empirical.py b/empirical.py index 0a4f0f8..f9183d1 100644 --- a/empirical.py +++ b/empirical.py @@ -34,6 +34,7 @@ import itertools from copy import deepcopy from data import DataTable +import scipy.spatial.distance as dist class Patch: @@ -552,6 +553,91 @@ def comm_sep(self, plot_locs, criteria, loc_unit=None): return result + def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=True, + n0_min_max=None): + """ + Calculates pairwise distances between individuals of a species. + + Parameters + ---------- + div_cols : tuple + Column names containing x and y coordinates of individuals + bin_edges : iterable + List of edges of distance classes to bin histogram of distances + criteria : dict + See docstring for Patch.sad. + edge_correct : bool + Correct histograms by replacing count of individuals at distance + bin with expected count if entire ring at that distance was + available (part of ring may fall outside of plot). Default True. + n0_min_max : tuple + Optional min and max abundance for species to consider. Useful for + ignoring rare species with few samples and abundant species for + which calculation would take a long time. + + Returns + ------- + result : tuple + Tuple with two elements. First is list of combinations used to + generate result. Second is another tuple with first element giving + list of species and second element giving list of histograms of + pairwise distances for that species. + + """ + + spp_list, spp_col, count_col, engy_col, mass, combinations = \ + self.parse_criteria(criteria) + + result = [] + for comb in combinations: + + # Get appropriate subtable for this combination + subtable = self.data_table.get_subtable(comb) + + # Loop all species + for spp in spp_list: + + spp_subtable = subtable[subtable[spp_col] == spp] + + # Get n0, accounting for count col + if count_col: + count = np.sum(spp_subtable[count_col]) + else: + count = len(spp_subtable) + + # Skip this spp if no min max or n0 outside of range + if n0_min_max and (count < n0_min_max[0] or count > + n0_min_max[1]): + continue + + # Get list of all points + x = spp_subtable[div_cols[0]] + y = spp_subtable[div_cols[1]] + all_points = zip(x,y) + + # If n0 < 1e5, get all pairwise distances at once + if count < 1e5: + all_dist = dist.pdist(all_points) + hist, _ = np.histogram(all_points, bin_edges) + + # If n0 > 1e5, loop individuals (all dist too large) + # TODO: Write unit test to test this + else: + hist = np.array(len(bin_edges) - 1) + for i, point in enumerate(all_points()): + + # Skip current index + this_point = all_points[i] + all_other_points = all_points[:i] + all_points[i+1:] + + # Get dist from this point to all other points + other_dist = dist.cdist(this_point, all_other_points) + hist += other_dist + + result.append(hist) + + return result + def ied(self, criteria, normalize=True, exponent=0.75): ''' @@ -580,7 +666,7 @@ def ied(self, criteria, normalize=True, exponent=0.75): the subset. The third element is the full (not unique) species list for the given criteria. - Notes + lNotes ----- If count_col is None or is all ones, the entire energy column for each subtable is returned. Else, the average energy per individual, From 60ad16bd3e525391c3c5937d073ff77095afb78f Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 10 Feb 2014 20:43:56 -0800 Subject: [PATCH 019/343] Fixed brentq rounding error in tgeo --- distributions.py | 164 +++++++++++++++++++++++++++++++++++++----- test_distributions.py | 35 ++++++++- 2 files changed, 182 insertions(+), 17 deletions(-) diff --git a/distributions.py b/distributions.py index 39944eb..005d428 100644 --- a/distributions.py +++ b/distributions.py @@ -1319,6 +1319,93 @@ def pmf(self, n): # TODO: Write cdf method based on cdf of plognorm, similar to above +class canonical_lognorm(Distribution): + __doc__ = Distribution.__doc__ + \ + ''' + Description + ------------ + Lognormal distribution + + Parameters + ---------- + mu : float + The mu parameter of the log normal + sigma : float + The sigma parameter of the log normal + n_samp : int or iterable (optional) + Total number of species / samples + tot_obs: int or iterable (optional) + Total number of individuals / observations + + self.var keywords + ----------------- + mu : list of floats + The mu parameter of the lognormal calculated with + np.log(tot_obs / n_samp) - (sigma**2 / 2). + sigma : list of float + The sigma parameter of the log normal + + Notes + ----- + Currently, lognormal is implemented so that mu is calculated using tot_obs, + n_samp, and sigma. While, mu can be passed in as a keyword argument, this + mu will be ignored. + + ''' + + @doc_inherit + def __init__(self, **kwargs): + self.params = kwargs + self.min_supp = 1 + self.par_num = 2 + self.var = {} + + @doc_inherit + def pmf(self, n): + + # Get parameters + tot_obs, n_samp = self.get_params(['tot_obs','n_samp']) + n = expand_n(n, len(tot_obs)) + + # Calculate sigma + sigma = np.sqrt((2 * np.log(n_samp)) / np.log(2)**2) + + # Calculate mu + mu = np.log(tot_obs / n_samp) - (sigma**2 / 2) + self.var['mu'] = mu + self.var['sigma'] = sigma + + # Calculate pmf + pmf = [] + for tmu, tsigma, tn in zip(mu, sigma, n): + tpmf = stats.lognorm.pdf(tn, tsigma, scale=np.exp(tmu)) + pmf.append(tpmf) + + return pmf + + @doc_inherit + def cdf(self, n): + + # Get parameters + tot_obs, n_samp = self.get_params(['tot_obs','n_samp']) + n = expand_n(n, len(tot_obs)) + + # Calculate sigma + sigma = np.sqrt((2 * np.log(n_samp)) / np.log(2)**2) + + # Calculate mu + mu = np.log(tot_obs / n_samp) - (sigma**2 / 2) + self.var['mu'] = mu + self.var['sigma'] = sigma + + #Calculate cdf + cdf = [] + for tmu, tsigma, tn in zip(mu, sigma, n): + tcdf = stats.lognorm.cdf(tn, tsigma, scale=np.exp(tmu)) + cdf.append(tcdf) + + return cdf + class lognorm(Distribution): __doc__ = Distribution.__doc__ + \ @@ -2602,12 +2689,11 @@ def pmf(self, n): # Get parameters n_samp, tot_obs = self.get_params(['n_samp', 'tot_obs']) n = expand_n(n, len(n_samp)) - - # TODO: Additional checks? + + # Define normalizing constant and pmf functions + z_func = lambda x, ttot_obs: (1 - x ** (ttot_obs + 1)) / (1 - x) + pmf_func = lambda z, x, tn: (1 / z) * (x ** tn) - #NOTE: Overflow warning but not affecting results - eq = lambda x, N, a: ((x / (1 - x)) - (((N + 1) * x ** (N + 1)) / \ - (1 - x ** (N + 1)))) - (N * a) pmf = [] self.var['x'] = [] for tn_samp, ttot_obs, tn in zip(n_samp, tot_obs, n): @@ -2624,25 +2710,49 @@ def pmf(self, n): tpmf[np.where(tn == ttot_obs)[0]] = 1 x = 0 + elif ta < 0.5: + try: + stop = 1 - 1e-10 + # This is very brittle for some reason. Changing the stop + # value can make this fail for strange reasons + x = scipy.optimize.brentq(l_solver, 0, .999999, + args=(ttot_obs, ta), disp=False) + except: + try: + x = scipy.optimize.brentq(l_solver, 0, .95, + args=(ttot_obs, ta), disp=False) + except: + raise ValueError("No solution to " + + "%s.pmf when tot_obs = " % + (self.__class__.__name__) + + "%.2f, n_samp = %.10f and a = %.10f" % + (ttot_obs, tn_samp, ta)) + z = z_func(x, ttot_obs) + tpmf = pmf_func(z, x, tn) else: try: - x = scipy.optimize.brentq(eq, 0, min((sys.float_info[0] * - ta)**(1/float(ttot_obs)), 8), args=(ttot_obs, ta), - disp=False, xtol=1e-60) + x = scipy.optimize.brentq(l_solver, 0, + min((sys.float_info[0] * ta)**(1/float(ttot_obs)), + 8), args=(ttot_obs, ta), disp=False, + xtol=1e-60, max_iter=200) + except: + try: # Allows it to pass, but optimizer starts rounding. # Not Sure why it is doing this. - x = scipy.optimize.brentq(eq, 8.0, 50.0, \ - args=(ttot_obs, ta), disp=False, xtol=1e-60) + x = scipy.optimize.brentq(l_solver, 8.0, 50.0, \ + args=(ttot_obs, ta), disp=False, xtol=1e-60, + max_iter=200) except: - raise ValueError("No solution to %s.pmf when tot_obs = " % - (self.__class__.__name__) + - "%.2f, n_samp = %.10f and a = %.10f" % - (ttot_obs, tn_samp, ta)) - z = (1 - x ** (ttot_obs + 1)) / (1 - x) - tpmf = (1 / z) * (x ** tn) + raise ValueError("No solution to " + + "%s.pmf when tot_obs = " % + (self.__class__.__name__) + + "%.2f, n_samp = %.10f and a = %.10f" % + (ttot_obs, tn_samp, ta)) + z = z_func(x, ttot_obs) + tpmf = pmf_func(z, x, tn) pmf.append(tpmf) self.var['x'].append(x) @@ -4030,6 +4140,28 @@ def nu_pmf_eq(es, beta, l2, s): return (1 / np.log(s / beta)) * (np.exp(-beta / (l2 * (es - 1)))) / \ (es - 1) +def l_solver(x, N, a): + """ + Used with a solver to get the langrange multiplier for a pi distribution + + Parameters + ---------- + x : float + Lagrange multiplier x = e**-lambda + N : float + total balls (individuals) in urn (species) + a : float + area fraction. 1 / n_samp or 1 / urn_number + + Returns + ------- + : float + + + """ + return ((x / (1 - x)) - (((N + 1) * x ** (N + 1)) / \ + (1 - x ** (N + 1)))) - (N * a) + def beta_solver(x, k, tot_obs, n_samp): """ Used with a solver to get the beta lagrange multiplier in the METE distributions. With a solver, this function diff --git a/test_distributions.py b/test_distributions.py index 3120f73..63e14d8 100644 --- a/test_distributions.py +++ b/test_distributions.py @@ -577,10 +577,35 @@ def test_fgeo(self): def test_tgeo(self): + # Test against values from Harte 2011 + x_vals = [0.333, 0.434, .568, .707, .823, .901] + tg = tgeo(tot_obs=[1,2,4,8,16,32], n_samp=4) + tg.pmf(0) + pred_vals = np.round(tg.var['x'], 3) + print pred_vals + self.assertTrue(np.array_equal(x_vals, pred_vals)) + + # In Harte 2011 .143 is given as .125, but this is a mistake. Every + # other value is exactly as expected from teh the book + x_vals = [0.143, .220, .344, .505, .669, .801] + tg = tgeo(tot_obs=[1,2,4,8,16,32], n_samp=8) + tg.pmf(0) + pred_vals = np.round(tg.var['x'], 3) + print pred_vals + self.assertTrue(np.array_equal(x_vals, pred_vals)) + + x_vals = [0.067, .115, .201, .334, .5, .667] + tg = tgeo(tot_obs=[1,2,4,8,16,32], n_samp=16) + tg.pmf(0) + pred_vals = np.round(tg.var['x'], 3) + print pred_vals + self.assertTrue(np.array_equal(x_vals, pred_vals)) + # Test tgeo cdf is one dist = tgeo(n_samp=10, tot_obs=2345) self.assertTrue(np.round(dist.cdf(2345)[0][0], decimals=1) == 1.0) - + + # When n_samp < 2 weird things happen # Testing Lagrange multiplier against values generated by hand # [(n=60, a=.1), (n=340, a=.6), (n=34, a=.9), (n=12, a=.9), (n=2, .9), # (n=1, a=.1),(n=1, a=0.0001), @@ -602,8 +627,16 @@ def test_tgeo(self): tg = tgeo(tot_obs=[1,10], n_samp=[1/.9, 1/.99]) tg.pmf(0) pred_vals = np.round(tg.var['x'], decimals=4) + print pred_vals self.assertTrue(np.array_equal(x_vals, pred_vals)) + # Test a case that was failing for Erica Newman + x_val = [.9896] + tg = tgeo(tot_obs=341, n_samp=4) + tg.pmf(0) + print tg.var['x'] + self.assertTrue(np.round(tg.var['x'], 4) == x_val[0]) + # Test that pdf and cdf give correct values check = dist.pmf([1,1,2,3,4,5,12,34,65]) self.assertTrue(dist.cdf(0)[0][0] == dist.pmf(0)[0][0]) From a108c4d5b25dbbb87191464fd4d532fbbb629495 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 11 Feb 2014 09:05:52 -0800 Subject: [PATCH 020/343] Added variable printing in output --- output.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/output.py b/output.py index c5f631d..f5c3e22 100644 --- a/output.py +++ b/output.py @@ -244,7 +244,8 @@ def write_summary_table(self, smry, criteria=None, species=None): dt_rare['<=' + str(mins)] = dt['tot_min'][mins][i] dt_vars = {} for key in dt['vars'].iterkeys(): - dt_vars[key] = dt['vars'][key][i] + if len(dt['vars'][key]) != 0: + dt_vars[key] = dt['vars'][key][i] fout.write('PREDICTED DISTRIBUTION : ' + kw + '\n' + self.urns + ' = ' + str(dt['urns'][i]) + '\n' + From 588283a73840605c820b01fce9b7185d85d936ab Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 14 Feb 2014 14:53:28 -0800 Subject: [PATCH 021/343] Update pair_dist with while loop for points --- empirical.py | 156 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 121 insertions(+), 35 deletions(-) diff --git a/empirical.py b/empirical.py index f9183d1..7c4f509 100644 --- a/empirical.py +++ b/empirical.py @@ -35,6 +35,7 @@ from copy import deepcopy from data import DataTable import scipy.spatial.distance as dist +import shapely.geometry as geo class Patch: @@ -553,9 +554,9 @@ def comm_sep(self, plot_locs, criteria, loc_unit=None): return result - def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=True, - n0_min_max=None): - """ + def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, + n0_min_max=None, density=False): + ''' Calculates pairwise distances between individuals of a species. Parameters @@ -569,74 +570,159 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=True, edge_correct : bool Correct histograms by replacing count of individuals at distance bin with expected count if entire ring at that distance was - available (part of ring may fall outside of plot). Default True. + available (part of ring may fall outside of plot). Default False. n0_min_max : tuple Optional min and max abundance for species to consider. Useful for ignoring rare species with few samples and abundant species for which calculation would take a long time. + density : bool + If True, return densities (counts divided by area of torus defined + by bin edges) instead of counts. Default False. Returns ------- result : tuple - Tuple with two elements. First is list of combinations used to - generate result. Second is another tuple with first element giving - list of species and second element giving list of histograms of - pairwise distances for that species. + List of tuples with three elements each. First is combination used + to generate results, second is spp_list for that combination + (includes all species in entire landscape), and third is list of + length spp_list giving histogram of pairwise distances for each + species. - """ + ''' spp_list, spp_col, count_col, engy_col, mass, combinations = \ self.parse_criteria(criteria) - result = [] + bin_edges = np.array(bin_edges) + + result_list = [] + for comb in combinations: + # If comb includes division, cannot also use edge correction + # This would require better parsing of plot boundaries for division + if (not comb.keys() == []) and edge_correct: + raise NotImplementedError("Edge correction cannot be used " + "with combinations at present.") + # Get appropriate subtable for this combination subtable = self.data_table.get_subtable(comb) + # Declare empty list for all histograms for all species + spp_hist_list = [] + + # Set up plot polygon for edge correction + if edge_correct: + xmin = self.data_table.meta[(div_cols[0], 'minimum')] + xmax = self.data_table.meta[(div_cols[0], 'maximum')] + ymin = self.data_table.meta[(div_cols[1], 'minimum')] + ymax = self.data_table.meta[(div_cols[1], 'maximum')] + + plot = geo.box(xmin, ymin, xmax, ymax) + + all_r = (bin_edges[:-1] + bin_edges[1:]) / 2 + + # Calculate areas of all toruses + if density: + assert edge_correct, 'Edge correct must be used with density' + areas = [] + for i in range(len(bin_edges) - 1): + areas.append(np.pi*(bin_edges[i+1]**2 - bin_edges[i]**2)) + areas = np.array(areas) + # Loop all species for spp in spp_list: spp_subtable = subtable[subtable[spp_col] == spp] + # If spp not in this combination, continue + if len(spp_subtable) == 0: + spp_hist_list.append(None) + continue + # Get n0, accounting for count col if count_col: count = np.sum(spp_subtable[count_col]) else: count = len(spp_subtable) - # Skip this spp if no min max or n0 outside of range + # Skip this spp if there is a min_max set and n0 out of range if n0_min_max and (count < n0_min_max[0] or count > n0_min_max[1]): + spp_hist_list.append(None) continue - # Get list of all points + # Get list of all points and all counts x = spp_subtable[div_cols[0]] y = spp_subtable[div_cols[1]] all_points = zip(x,y) - - # If n0 < 1e5, get all pairwise distances at once - if count < 1e5: - all_dist = dist.pdist(all_points) - hist, _ = np.histogram(all_points, bin_edges) - - # If n0 > 1e5, loop individuals (all dist too large) - # TODO: Write unit test to test this - else: - hist = np.array(len(bin_edges) - 1) - for i, point in enumerate(all_points()): - - # Skip current index - this_point = all_points[i] - all_other_points = all_points[:i] + all_points[i+1:] - - # Get dist from this point to all other points - other_dist = dist.cdist(this_point, all_other_points) - hist += other_dist - - result.append(hist) - - return result + all_counts = list(spp_subtable[count_col]) + + # Declare array to hold histogram of pairwise distances + all_hist = np.zeros(len(bin_edges) - 1) + + # Go through all_points until only one left + while len(all_points) > 1: + + # Get this point and remove from list of all points + this_point = all_points.pop(0) + print this_point + this_count = all_counts.pop(0) + + # Get dist from this point to all other points + other_dist = dist.cdist(np.array([this_point]), + np.array(all_points)) + + # Repeat other point distances to acccount for their counts + other_dist = np.repeat(other_dist, all_counts) + + # Repeat entire other_dist to account for count here + other_dist = np.tile(other_dist, this_count) + print other_dist + + # Add 0 distances between individs at this point + n_this_dists = this_count * (this_count-1) / 2 + if n_this_dists > 0: + other_dist = np.concatenate((other_dist, + np.zeros(n_this_dists))) + + hist, _ = np.histogram(other_dist, bin_edges) + + # Edge correct distance + if edge_correct: + corr_fact = np.zeros(len(all_r)) + for i, r in enumerate(all_r): + x, y = this_point + circ = 2 * np.pi * r + ring = geo.Point(x,y).buffer(r,resolution=128) + out_len = ring.boundary.difference(plot).length + in_frac = ((ring.boundary.length - out_len) / + ring.boundary.length) + corr_fact[i] = in_frac + hist = hist / corr_fact + + # Add this point results to main histogram + print hist + all_hist += hist + print all_hist + + # Account for distance between counts at last point if needed + if all_counts[0] > 1: + n_this_dists = all_counts[0] * (all_counts[0]-1) / 2 + hist, _ = np.histogram(np.zeros(n_this_dists), bin_edges) + all_hist += hist + + # If density, divide all values by area of torus between bins + if density: + all_hist = all_hist / np.array(areas) + + # Append final hist for this species to running list + spp_hist_list.append(all_hist) + + # For this comb, create and append tuple to result list + result_list.append((comb, spp_list, spp_hist_list)) + + return result_list def ied(self, criteria, normalize=True, exponent=0.75): From cf32a2e4a9e8036e92457de06be655de6484c53e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 16 Feb 2014 19:34:28 -0800 Subject: [PATCH 022/343] Complete and tested o_ring method for Patch (formerly pair_dist) --- empirical.py | 107 +++++++++++++++++++++++++++++----------------- test_empirical.py | 48 ++++++++++++++++++--- 2 files changed, 109 insertions(+), 46 deletions(-) diff --git a/empirical.py b/empirical.py index 7c4f509..9db47ad 100644 --- a/empirical.py +++ b/empirical.py @@ -554,10 +554,10 @@ def comm_sep(self, plot_locs, criteria, loc_unit=None): return result - def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, - n0_min_max=None, density=False): + def o_ring(self, div_cols, bin_edges, criteria, n0_min_max=None, + edge_correct=False, density=False): ''' - Calculates pairwise distances between individuals of a species. + Calculates univariate O-ring for a species. Parameters ---------- @@ -566,15 +566,15 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, bin_edges : iterable List of edges of distance classes to bin histogram of distances criteria : dict - See docstring for Patch.sad. - edge_correct : bool - Correct histograms by replacing count of individuals at distance - bin with expected count if entire ring at that distance was - available (part of ring may fall outside of plot). Default False. + See docstring for Patch.sad. Count column must be used. n0_min_max : tuple Optional min and max abundance for species to consider. Useful for ignoring rare species with few samples and abundant species for which calculation would take a long time. + edge_correct : bool + Correct histograms by replacing count of individuals at distance + bin with expected count if entire ring at that distance was + available (part of ring may fall outside of plot). Default False. density : bool If True, return densities (counts divided by area of torus defined by bin edges) instead of counts. Default False. @@ -588,6 +588,21 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, length spp_list giving histogram of pairwise distances for each species. + Notes + ----- + Pairwise distances are directional, giving n(n-1) total distances, as + edge correction is directional. + + If there are no records in a combination, histogram will be None. If + there are records but a species has only one individual, histogram + will be all zeros. + + When using edge_correct or density, the maximum distance used for edge + correction, given by the mean of the last two bin_edge values, should + be no greater than one half the longer dimension of the plot. This + ensures that it is not possible for an entire edge correction buffer + to be outside of the plot, which could lead to divide by zero errors. + ''' spp_list, spp_col, count_col, engy_col, mass, combinations = \ @@ -603,7 +618,7 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, # This would require better parsing of plot boundaries for division if (not comb.keys() == []) and edge_correct: raise NotImplementedError("Edge correction cannot be used " - "with combinations at present.") + "with combinations.") # Get appropriate subtable for this combination subtable = self.data_table.get_subtable(comb) @@ -611,6 +626,10 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, # Declare empty list for all histograms for all species spp_hist_list = [] + # If density is True, set edge_correct to True + if density: + edge_correct = True + # Set up plot polygon for edge correction if edge_correct: xmin = self.data_table.meta[(div_cols[0], 'minimum')] @@ -624,18 +643,19 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, # Calculate areas of all toruses if density: - assert edge_correct, 'Edge correct must be used with density' - areas = [] + ring_areas = [] for i in range(len(bin_edges) - 1): - areas.append(np.pi*(bin_edges[i+1]**2 - bin_edges[i]**2)) - areas = np.array(areas) + ring_areas.append(np.pi*(bin_edges[i+1]**2 - + bin_edges[i]**2)) + ring_areas = np.array(ring_areas) # Loop all species for spp in spp_list: spp_subtable = subtable[subtable[spp_col] == spp] - # If spp not in this combination, continue + # If spp not present or singleton, continue + # Ensure that if single record but count > 1, do analysis if len(spp_subtable) == 0: spp_hist_list.append(None) continue @@ -661,31 +681,43 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, # Declare array to hold histogram of pairwise distances all_hist = np.zeros(len(bin_edges) - 1) - # Go through all_points until only one left - while len(all_points) > 1: + # Declare array to hold all sampled areas per bin + if density: + all_areas = np.zeros(len(ring_areas)) + + # Go through all_points + for i, this_point in enumerate(all_points): # Get this point and remove from list of all points - this_point = all_points.pop(0) - print this_point - this_count = all_counts.pop(0) + this_count = all_counts[i] + + # Create list of all other points and counts except this + all_other_points = all_points[0:i] + all_points[i+1:] + all_other_counts = all_counts[0:i] + all_counts[i+1:] # Get dist from this point to all other points - other_dist = dist.cdist(np.array([this_point]), - np.array(all_points)) + # If no other points, other_dist is empty + # May still be other individs at this point + if all_other_points: + other_dist = dist.cdist(np.array([this_point]), + np.array(all_other_points)) + else: + other_dist = np.array(()) # Repeat other point distances to acccount for their counts - other_dist = np.repeat(other_dist, all_counts) + other_dist = np.repeat(other_dist, all_other_counts) # Repeat entire other_dist to account for count here other_dist = np.tile(other_dist, this_count) - print other_dist # Add 0 distances between individs at this point - n_this_dists = this_count * (this_count-1) / 2 + # Multiplied by two to get directional pairwise dists + n_this_dists = this_count - 1 if n_this_dists > 0: other_dist = np.concatenate((other_dist, - np.zeros(n_this_dists))) + np.zeros(n_this_dists*2))) + # Calculate histogram of distances to other points hist, _ = np.histogram(other_dist, bin_edges) # Edge correct distance @@ -693,28 +725,23 @@ def pair_dist(self, div_cols, bin_edges, criteria, edge_correct=False, corr_fact = np.zeros(len(all_r)) for i, r in enumerate(all_r): x, y = this_point - circ = 2 * np.pi * r - ring = geo.Point(x,y).buffer(r,resolution=128) - out_len = ring.boundary.difference(plot).length - in_frac = ((ring.boundary.length - out_len) / - ring.boundary.length) + circ = geo.Point(x,y).buffer(r,resolution=64) + out_len = circ.boundary.difference(plot).length + in_frac = ((circ.boundary.length - out_len) / + circ.boundary.length) corr_fact[i] = in_frac hist = hist / corr_fact - # Add this point results to main histogram - print hist - all_hist += hist - print all_hist + # Store sampled area at each dist for density calculation + if density: + all_areas += (ring_areas * corr_fact) - # Account for distance between counts at last point if needed - if all_counts[0] > 1: - n_this_dists = all_counts[0] * (all_counts[0]-1) / 2 - hist, _ = np.histogram(np.zeros(n_this_dists), bin_edges) + # Add this point results to main histogram all_hist += hist - # If density, divide all values by area of torus between bins + # If density, divide all values by summed sampled torus areas if density: - all_hist = all_hist / np.array(areas) + all_hist = all_hist / all_areas # Append final hist for this species to running list spp_hist_list.append(all_hist) diff --git a/test_empirical.py b/test_empirical.py index 5fd53f7..c1fd442 100644 --- a/test_empirical.py +++ b/test_empirical.py @@ -11,7 +11,6 @@ from empirical import * import numpy as np - class TestPatch(unittest.TestCase): def setUp(self): @@ -21,7 +20,7 @@ def setUp(self): grt, .1, .2, 1 grt, .1, .3, 1 rty, .1, .2, 1 -rty, .2, .3, 1''') +rty, .2, .3, 2''') self.xyfile5.close() self.xymeta5 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, @@ -297,10 +296,10 @@ def test_sad(self): # Test correct result with 'whole' and one division sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', 'x': 1}) - self.assertTrue(np.array_equal(sad[0][1], np.array([4,2]))) + self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', 'x': 'whole'}) - self.assertTrue(np.array_equal(sad[0][1], np.array([4,2]))) + self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) sad = self.pat4.sad({'spp_code': 'species', 'count' :'count', 'x': 1}) self.assertTrue(np.array_equal(sad[0][2], np.array([0,1,2,3]))) @@ -465,6 +464,45 @@ def test_comm_sep(self): jac_sort = np.sort(comm['jaccard']) np.testing.assert_array_almost_equal(jac_sort, np.array((0,0,0.4)), 5) + def test_o_ring(self): + + # Check standard case, no min max, no edge correction, no criteria + # Tests that distances and repeats for count col are correct + result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], + {'spp_code': 'species', 'count': 'count'}) + + np.testing.assert_array_equal(result_list[0][2][0], np.array((8,4))) + np.testing.assert_array_equal(result_list[0][2][1], np.array((2,4))) + + # Check standard case, no min max, no edge correction, with division + result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], + {'spp_code': 'species', 'count': 'count', + 'y': 2}) + + # - First half of y, both species + np.testing.assert_array_equal(result_list[0][2][0], np.array((6,0))) + np.testing.assert_array_equal(result_list[0][2][1], np.array((0,0))) + + # - Second half of y, both species + np.testing.assert_array_equal(result_list[1][2][0], np.array((0,0))) + np.testing.assert_array_equal(result_list[1][2][1], np.array((2,0))) + + # Check edge correction - check only first species + # Almost equal required due to float division + result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], + {'spp_code': 'species', 'count': 'count'}, + edge_correct=True) + np.testing.assert_array_almost_equal(result_list[0][2][0], + np.array((8,18))) + + # Check density - check only second species + print 'here ' + result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], + {'spp_code': 'species', 'count': 'count'}, + density=True) + np.testing.assert_array_almost_equal(result_list[0][2][1], + np.array((1358.12218105,0))) + def test_ssad(self): # Check that ssad does not lose any individuals @@ -538,5 +576,3 @@ def test_sed(self): if __name__ == "__main__": unittest.main() - - From 5fc4d7ed6882421476e7424f5443736fc728a8d6 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 16 Feb 2014 21:09:27 -0800 Subject: [PATCH 023/343] Bump year in LICENSE and give plain txt extension --- LICENSE.md => LICENSE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename LICENSE.md => LICENSE.txt (94%) diff --git a/LICENSE.md b/LICENSE.txt similarity index 94% rename from LICENSE.md rename to LICENSE.txt index af4f37c..5081918 100644 --- a/LICENSE.md +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2013, The Regents of the University of California +Copyright (c) 2013-2014, The Regents of the University of California All rights reserved. Redistribution and use in source and binary forms, with or without From 7ba537398c8eb4b9b64c38bf1cdef2f235172c81 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 16 Feb 2014 21:04:03 -0800 Subject: [PATCH 024/343] Move py files to subdir to prepare for setuptools --- __init__.py => macroeco/__init__.py | 0 compare.py => macroeco/compare.py | 0 data.py => macroeco/data.py | 0 distributions.py => macroeco/distributions.py | 0 empirical.py => macroeco/empirical.py | 0 output.py => macroeco/output.py | 0 test_compare.py => macroeco/test_compare.py | 0 test_data.py => macroeco/test_data.py | 0 test_distributions.py => macroeco/test_distributions.py | 0 test_empirical.py => macroeco/test_empirical.py | 0 {utils => macroeco/utils}/__init__.py | 0 {utils => macroeco/utils}/docinherit.py | 0 {utils => macroeco/utils}/form_func.py | 0 {utils => macroeco/utils}/format_data.py | 0 {utils => macroeco/utils}/global_strings.py | 0 {utils => macroeco/utils}/make_metadata.py | 0 {utils => macroeco/utils}/metadata_writer.py | 0 {utils => macroeco/utils}/test_form_func.py | 0 {utils => macroeco/utils}/test_format_data.py | 0 {utils => macroeco/utils}/test_metadata_writer.py | 0 {utils => macroeco/utils}/test_workflow.py | 0 {utils => macroeco/utils}/workflow.py | 0 22 files changed, 0 insertions(+), 0 deletions(-) rename __init__.py => macroeco/__init__.py (100%) rename compare.py => macroeco/compare.py (100%) rename data.py => macroeco/data.py (100%) rename distributions.py => macroeco/distributions.py (100%) rename empirical.py => macroeco/empirical.py (100%) rename output.py => macroeco/output.py (100%) rename test_compare.py => macroeco/test_compare.py (100%) rename test_data.py => macroeco/test_data.py (100%) rename test_distributions.py => macroeco/test_distributions.py (100%) rename test_empirical.py => macroeco/test_empirical.py (100%) rename {utils => macroeco/utils}/__init__.py (100%) rename {utils => macroeco/utils}/docinherit.py (100%) rename {utils => macroeco/utils}/form_func.py (100%) rename {utils => macroeco/utils}/format_data.py (100%) rename {utils => macroeco/utils}/global_strings.py (100%) rename {utils => macroeco/utils}/make_metadata.py (100%) rename {utils => macroeco/utils}/metadata_writer.py (100%) rename {utils => macroeco/utils}/test_form_func.py (100%) rename {utils => macroeco/utils}/test_format_data.py (100%) rename {utils => macroeco/utils}/test_metadata_writer.py (100%) rename {utils => macroeco/utils}/test_workflow.py (100%) rename {utils => macroeco/utils}/workflow.py (100%) diff --git a/__init__.py b/macroeco/__init__.py similarity index 100% rename from __init__.py rename to macroeco/__init__.py diff --git a/compare.py b/macroeco/compare.py similarity index 100% rename from compare.py rename to macroeco/compare.py diff --git a/data.py b/macroeco/data.py similarity index 100% rename from data.py rename to macroeco/data.py diff --git a/distributions.py b/macroeco/distributions.py similarity index 100% rename from distributions.py rename to macroeco/distributions.py diff --git a/empirical.py b/macroeco/empirical.py similarity index 100% rename from empirical.py rename to macroeco/empirical.py diff --git a/output.py b/macroeco/output.py similarity index 100% rename from output.py rename to macroeco/output.py diff --git a/test_compare.py b/macroeco/test_compare.py similarity index 100% rename from test_compare.py rename to macroeco/test_compare.py diff --git a/test_data.py b/macroeco/test_data.py similarity index 100% rename from test_data.py rename to macroeco/test_data.py diff --git a/test_distributions.py b/macroeco/test_distributions.py similarity index 100% rename from test_distributions.py rename to macroeco/test_distributions.py diff --git a/test_empirical.py b/macroeco/test_empirical.py similarity index 100% rename from test_empirical.py rename to macroeco/test_empirical.py diff --git a/utils/__init__.py b/macroeco/utils/__init__.py similarity index 100% rename from utils/__init__.py rename to macroeco/utils/__init__.py diff --git a/utils/docinherit.py b/macroeco/utils/docinherit.py similarity index 100% rename from utils/docinherit.py rename to macroeco/utils/docinherit.py diff --git a/utils/form_func.py b/macroeco/utils/form_func.py similarity index 100% rename from utils/form_func.py rename to macroeco/utils/form_func.py diff --git a/utils/format_data.py b/macroeco/utils/format_data.py similarity index 100% rename from utils/format_data.py rename to macroeco/utils/format_data.py diff --git a/utils/global_strings.py b/macroeco/utils/global_strings.py similarity index 100% rename from utils/global_strings.py rename to macroeco/utils/global_strings.py diff --git a/utils/make_metadata.py b/macroeco/utils/make_metadata.py similarity index 100% rename from utils/make_metadata.py rename to macroeco/utils/make_metadata.py diff --git a/utils/metadata_writer.py b/macroeco/utils/metadata_writer.py similarity index 100% rename from utils/metadata_writer.py rename to macroeco/utils/metadata_writer.py diff --git a/utils/test_form_func.py b/macroeco/utils/test_form_func.py similarity index 100% rename from utils/test_form_func.py rename to macroeco/utils/test_form_func.py diff --git a/utils/test_format_data.py b/macroeco/utils/test_format_data.py similarity index 100% rename from utils/test_format_data.py rename to macroeco/utils/test_format_data.py diff --git a/utils/test_metadata_writer.py b/macroeco/utils/test_metadata_writer.py similarity index 100% rename from utils/test_metadata_writer.py rename to macroeco/utils/test_metadata_writer.py diff --git a/utils/test_workflow.py b/macroeco/utils/test_workflow.py similarity index 100% rename from utils/test_workflow.py rename to macroeco/utils/test_workflow.py diff --git a/utils/workflow.py b/macroeco/utils/workflow.py similarity index 100% rename from utils/workflow.py rename to macroeco/utils/workflow.py From 04f13ad41c427b9d020e8e3a27674ad0990e4b7f Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 16 Feb 2014 21:07:14 -0800 Subject: [PATCH 025/343] Add basic setup.py --- setup.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4cd08b7 --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +setup( + name = 'macroeco', + version= '0.3', + description = 'Analysis of ecological patterns in Python', + author = 'Justin Kitzes, Mark Wilber, Chloe Lewis', + url = 'https://github.com/jkitzes/macroeco', + packages = ['macroeco', 'macroeco.utils'], + license = 'BSD', +) From 9bd18e361b336afaa58bc2251a271854632e3b7c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Feb 2014 10:56:58 -0800 Subject: [PATCH 026/343] Correct two typos in empirical docstrings for Sphinx rendering --- macroeco/empirical.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index ed9b7ba..5a0a58c 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -197,7 +197,6 @@ def sad(self, criteria, clean=False): Value has a different meaning depending on column type: - metric - number of divisions of data along this axis, int/float - categorical - 'split' calculates each category separately, - 'whole' takes the entire column. clean : bool If True, all the zeros are removed from the sads. If False, sads are left as is. @@ -864,8 +863,8 @@ def sed(self, criteria, normalize=True, exponent=0.75, clean=False): keyword looks up a np.array that contains the given species energy distribution. - Note - ---- + Notes + ----- The theta distribution from Harte (2011) is a an sed. ''' From 784dad6141cb1f614633239a688f51b3e92d8fc7 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Feb 2014 11:01:27 -0800 Subject: [PATCH 027/343] Update gitignore to only ignore generated docs --- .gitignore | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 2f1c545..3cf34a5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1 @@ -.DS_Store -*.aux -*.bbl -*.blg -*.log -*.fdb_latexmk -*.gz -~$* -*.m~ -*.swp -*.swo -*.pyc -tags -.#* -projects/sample_script/*results.txt -code/convert_mat_to_xy.pyc -logfile.txt -projects/compare_sad/*0.png -projects/compare_sad/*0.csv -projects/compare_sad/*0.txt -projects/compare_sar/*.png -projects/compare_sar/*.csv -projects/sample_script/*0.png -projects/sample_script/*0.csv -projects/sample_script/*0.txt +*generated* From 5530632d2c425dec0289e8163a64d0193185f101 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Feb 2014 11:03:08 -0800 Subject: [PATCH 028/343] Add draft docs including themes, templates, extensions --- doc/Makefile | 154 ++++++++++++++++ doc/_templates/.DS_Store | Bin 0 -> 6148 bytes doc/_templates/autosummary/.DS_Store | Bin 0 -> 6148 bytes doc/_templates/autosummary/class.rst | 27 +++ doc/_templates/layout.html | 24 +++ doc/conf.py | 255 +++++++++++++++++++++++++++ doc/distributions2.rst | 1 + doc/empirical.rst | 12 ++ doc/index.rst | 23 +++ doc/make.bat | 190 ++++++++++++++++++++ 10 files changed, 686 insertions(+) create mode 100644 doc/Makefile create mode 100644 doc/_templates/.DS_Store create mode 100644 doc/_templates/autosummary/.DS_Store create mode 100755 doc/_templates/autosummary/class.rst create mode 100644 doc/_templates/layout.html create mode 100644 doc/conf.py create mode 100644 doc/distributions2.rst create mode 100644 doc/empirical.rst create mode 100644 doc/index.rst create mode 100644 doc/make.bat diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..34e16a5 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,154 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build +SUBDIR = generated + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* $(SUBDIR)/ + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/macroeco.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/macroeco.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/macroeco" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/macroeco" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/_templates/.DS_Store b/doc/_templates/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8baf407a092f545d912839263f5fe9d08090386c GIT binary patch literal 6148 zcmeHK%}N6?5Kh{vy9&C3g5GlT(!+{B;K`-dlNVvd9#nLfEq0-9q`Pa8TG*q%kKj@C zk@RhxNm5a&HxZSYF!?5vnWXtrk_}^w*YaqQF^4f`fg+Y{s6G+wM;(%q@gQ6Nl*uXk zz;DT}p9NW-)`HF+wzh+;6&ia-%J!Qbgu8LGd+5w;$RrEmq}fr)anyv6{YIQbvRjjR z5~cgTb~fTDYpFqv!?vItIaO+s&b@8TFD$KZh4SL^>iXvP z?%`2$&f)K=WZK{Wp1}BG`DcD7Y0KmoA_m!mEJ9*{7$64zI0NQbvnKxdI_T#U1H{1h zGl1uV07di+78=#j0Tq5eVt)k@1#EmvAPR$?!9pYSfN-4(s8hLlVsM=feqrJ~gM~(& z&bXQx#xXN<^FrZjcJK=o&bX(MYGQyG_{cz6HyynHH@|-We@vnlF+dFbD+YL_?$>Lu zBy+b8Ee`Km0eS?Af^mh$w-hkcRSdCs70-bx0lz>4&@)(Q1P=&(2q+q;A_jhxfmc6D BTw4GD literal 0 HcmV?d00001 diff --git a/doc/_templates/autosummary/.DS_Store b/doc/_templates/autosummary/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0{{ shorttitle|e }} +{%- endblock %} + +{# Use our name in header #} +{%- block header %} +{% if theme_scipy_org_logo %} +
+
+ + SciPy +
+
+ +{% else %} +
+
+
+
+{% endif %} +{% endblock %} diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..37d87c2 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- +# +# macroeco documentation build configuration file, created by +# sphinx-quickstart on Sun Feb 16 21:19:54 2014. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath('_ext/numpydoc')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.mathjax', + 'sphinx.ext.autosummary', 'numpydoc', 'sphinx.ext.intersphinx'] + +autosummary_generate = True +#autodoc_default_flags = ['inherited-members'] + +intersphinx_mapping = {'scipy': ('http://docs.scipy.org/doc/scipy/reference/', + None)} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'macroeco' +copyright = u'2013-2014, Justin Kitzes and Mark Wilber' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.3' +# The full version, including alpha/beta/rc tags. +release = '0.3' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build', '_ext', '_templates'] + +# The reST default role (used for this markup: `text`) to use for all documents. +default_role = 'py:obj' + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'scipy' + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = ['_theme'] + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + "edit_link": False, + "rootlinks": [], + "sidebar": "right", + "scipy_org_logo": True, + "navigation_links": True, +} +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {'**': ['globaltoc.html', 'searchbox.html']} +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'macroecodoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'macroeco.tex', u'macroeco Documentation', + u'Justin Kitzes and Mark Wilber', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'macroeco', u'macroeco Documentation', + [u'Justin Kitzes and Mark Wilber'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'macroeco', u'macroeco Documentation', + u'Justin Kitzes and Mark Wilber', 'macroeco', + 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' diff --git a/doc/distributions2.rst b/doc/distributions2.rst new file mode 100644 index 0000000..be356a6 --- /dev/null +++ b/doc/distributions2.rst @@ -0,0 +1 @@ +.. automodule:: macroeco.distributions2 diff --git a/doc/empirical.rst b/doc/empirical.rst new file mode 100644 index 0000000..44fef13 --- /dev/null +++ b/doc/empirical.rst @@ -0,0 +1,12 @@ +.. currentmodule:: macroeco.empirical + +Empirical +============ + +Some description. + +.. autosummary:: + :toctree: generated/ + + Patch + z diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..3828de6 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,23 @@ +.. macroeco documentation master file, created by + sphinx-quickstart on Sun Feb 16 21:19:54 2014. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Macroeco: Ecological pattern analysis in Python +=============================================== + +Welcome to macroeco. + +.. toctree:: + :maxdepth: 2 + + empirical + distributions2 + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` + diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..9dd6c3f --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\macroeco.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\macroeco.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end From 72deccabc9cddc83b51c812410cc5e898200313f Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Feb 2014 11:03:56 -0800 Subject: [PATCH 029/343] Draft of new distributions2 module with tests --- macroeco/distributions2.py | 525 ++++++++++++++++++++++++++ macroeco/tests/test_distributions2.py | 150 ++++++++ 2 files changed, 675 insertions(+) create mode 100644 macroeco/distributions2.py create mode 100644 macroeco/tests/test_distributions2.py diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py new file mode 100644 index 0000000..6cfe7a6 --- /dev/null +++ b/macroeco/distributions2.py @@ -0,0 +1,525 @@ +""" +============================================== +Distributions (:mod:`macroeco.distributions2`) +============================================== + +This module contains distributions commonly used in analysis of ecological +patterns. At present, all distributions here are univariate. + +Most of these distributions are subclasses of `~scipy.stats.rv_continuous` and +`~scipy.stats.rv_discrete` found in `scipy.stats`. Additionally, several of the +distribution classes here are simple wrappers for existing distributions found +in `scipy.stats` that are updated to allow the use of common ecological +parameterizations. + +Continouous distributions +========================= + +.. autosummary:: + :toctree: generated/ + + expon + expon_uptrunc + +Discrete distributions +====================== + +.. autosummary:: + :toctree: generated/ + + geom + geom_uptrunc + nbinom + +.. DV: + Our public-facing distributions do not use location and scale parameters, as + they are not common in quantitative ecology. +""" + +from __future__ import division + +from decimal import Decimal +import numpy as np +import numpy.random as nprand + +from scipy.misc.doccer import inherit_docstring_from +from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, + docdict_discrete, docheaders) +import scipy.stats.distributions as spdist +import scipy.optimize as optim +import scipy.special as spec + + +_doc_param_note = \ +"""There are many available methods of `%(name)s`, each of which require one or +more of the parameters listed below. +""" + +_doc_custom_methods = \ +"""fit2(data, %(shapes)s) + MLE estimates of shapes given initial guesses (use instead of `fit`).""" + +_doc_discrete_custom_methods = \ +"""translate_args(uargs) + Get shape parameters from user-friendly args. +fit2(data, %(shapes)s) + MLE estimates of shapes given initial guesses.""" + +# Remove header from all methods +_docdict_allmeth_sh = docdict['allmethods'][16:] +_docdict_discrete_allmeth_sh = docdict_discrete['allmethods'][17:] + +# **kwds in expect string followed by no space was throwing warning +_docdict_allmeth_sh = _docdict_allmeth_sh.replace(', **kwds','') + +docdict['before_notes'] = ''.join([_doc_param_note, + docheaders['methods'], + _doc_custom_methods, + _docdict_allmeth_sh, + docdict['callparams']]) + +docdict_discrete['before_notes'] = ''.join([_doc_param_note, + docheaders['methods'], + _doc_discrete_custom_methods, + _docdict_discrete_allmeth_sh, + docdict_discrete['callparams']]) + + + +class rv_continuous_meco(rv_continuous): + """ + A modified generic continuous random variable class meant for subclassing. + + This class inherits from the `rv_continuous` class of `scipy.stats` and + contains all of its functionality. See the docstring of `rv_continuous` for + information on usage and subclassing. In addition, this class adds one new + methods. + + Methods + ------- + fit2 + calls method `fit` with fixed loc=0 and scale=1 (defaults) + + """ + + def fit2(self, *args): + """ + Return MLEs for shape parameters from data. + + Parameters + ---------- + data : array_like + Data to use in calculating the MLEs. + args : floats + Starting value(s) for shape parameters. Some may be held constant + (see Notes). + + Returns + ------- + tuple of floats + MLEs for shape parameters + + Notes + ----- + """ + + return self.fit(*args, floc=0, fscale=1)[:-2] + + +class rv_discrete_meco(rv_discrete): + """ + A modified generic discrete random variable class meant for subclassing. + + This class inherits from the `rv_discrete` class of `scipy.stats` and + contains all of its functionality. See the docstring of `rv_discrete` for + information on usage and subclassing. In addition, this class adds two new + methods. + + Methods + ------- + translate_args + takes user-friendly params as input and returns shape params + fit + estimates distribution params from data + + """ + + def translate_args(self, *args): + """ + Translates user-friendly arguments into shape parameters + + See distribution docstring for description of user arguments and shape + parameters. + + Parameters + ---------- + uargs : floats + User argument(s), usually easily measured and specified + + Returns + ------- + tuple of floats + Shape parameter(s) of distribution + + Notes + ----- + """ + + raise NotImplementedError, ("translate_args method not implemented " + "for this distribution") + + + def fit2(self, *args): + """ + Return MLEs for shape parameters from data. + + Parameters + ---------- + data : array_like + Data to use in calculating the MLEs. + args : floats + Subset of shape parameters that are not fit. See Notes. + + Returns + ------- + tuple of floats + MLEs for shape parameters + + Notes + ----- + """ + + raise NotImplementedError, ("fit method not implemented for this " + "distribution") + + +# +# Discrete +# + +class geom_gen(rv_discrete_meco): + r""" + A geometric discrete random variable. + + This implementation of the geometric distribution differs from that in + `scipy.stats`, as the distribution here has support from 0 to inf. + + .. math:: + \mathrm{pmf(x)} = (1-p)^{x} p + + for ``x >= 0``. The location parameter ``loc`` is not used. + + %(before_notes)s + uargs : float + distribution mean + + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu): + return 1 / (np.array(mu) + 1) + + @inherit_docstring_from(rv_discrete_meco) + def fit2(self, data): + """%(super)s + Requires one argument containing data to fit. + """ + return self.translate_args(np.mean(data)), + + def _argcheck(self, p): + return (p <= 1) & (p >= 0) + + def _pmf(self, x, p): + return (1-p)**x * p + + def _logpmf(self, x, p): + return k*np.log(1-p) + log(p) + + def _cdf(self, x, p): + x = np.floor(x) + return (1.0-(1.0-p)**(x+1)) + + def _stats(self, p): + mu = (1.0 - p) / p + var = (1.0 - p) / p**2 + return mu, var, None, None + +geom = geom_gen(name='geom', shapes='p') + + +class geom_uptrunc_gen(rv_discrete_meco): + r""" + An upper-truncated geometric discrete random variable. + + .. math:: + + \mathrm{pmf(x)} = \frac{(1-p)^{x} p}{1 - (1-p)^{b+1}} + + for ``x >= 0``. + + `geom_uptrunc` takes two shape parameters: ``p`` and ``b``, the upper + limit. The location parameter ``loc`` is not used. + + %(before_notes)s + + uargs : float + distribution mean, upper limit + + Notes + ----- + The boundary ``p = 1`` is a special case in which the ratio between + successive terms of the distribution is 1 (i.e., the pmf is uniform). This + arises when the mean of the distribution is precisely one-half the upper + limit. + + This distribution is known as the Pi distribution in the MaxEnt Theory of + Ecology [#]_, where the ``p`` parameter is known as ``exp(-lambda)``. + + References + ---------- + .. [#] + Harte, J. (2011). Maximum Entropy and Ecology: A Theory of + Abundance, Distribution, and Energetics (p. 264). Oxford, United + Kingdom: Oxford University Press. + + .. + DEV: There is a difficult implicit equation needed to determine the p + parameter from the mu and b arguments. We've employed the brentq solver + here but note that it fails regularly for certain shape combinations. + + """ + + # TODO: Should add a warning for b < 5 or 10 or so (p solver gives erratic + # answers. + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu, b): + return _geom_solve_p_from_mu_vect(mu, b), b + + @inherit_docstring_from(rv_discrete_meco) + def fit2(self, data, b): + """%(super)s + Requires two arguments consisting of data to fit and ``b``, the upper + limit of the distribution (held constant). + """ + # Take mean of data as MLE of distribution mean, then calculate p + mu = np.mean(data) + return _geom_solve_p_from_mu_vect(mu, b), b + + def _argcheck(self, p, b): + # Unlike the traditional geometric, p can be > 0 + return (p >= 0) + + def _pmf(self, x, p, b): + pmf = (1.0-p)**x * p / (1.0-(1.0-p)**(b+1)) + pmf[x > b] = 0 + return pmf + + def _cdf(self, x, p, b): + k = np.floor(x) + cdf = (1.0-(1.0-p)**(x+1)) / (1.0-(1.0-p)**(b+1)) + cdf[x > b] = 1 + return cdf + + def _stats(self, p, b): + mu = (p / (1 - p)) - ((b + 1) / (p**-b - 1)) + return mu, None, None, None + +geom_uptrunc = geom_uptrunc_gen(name='geom_uptrunc', shapes='p, b') + +def _geom_solve_p_from_mu(mu, b): + """ + For the geom_uptrunc, given mu and b, return p. + Ref: Harte 2011, Oxford U Press. Eq. 7.50. + """ + + def p_eq(p, mu, b): + p, mu, b = Decimal(p), Decimal(mu), Decimal(b) + return ( (p / (1 - p)) - ((b + 1) / (p**-b - 1)) - mu ) + + return optim.brentq(p_eq, 1e-9, 20, args=(mu, b), disp=True) + +_geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) + + +class nbinom_gen(spdist.nbinom_gen): + r""" + A negative binomial discrete random variable. + + This implementation of the geometric distribution differs from that in + `scipy.stats`, as the distribution here uses the more common ecological + parameterization. + + .. math:: + + \mathrm{pmf(x)} = + \frac{\Gamma (k + x)}{\Gamma(k) x!} \left(\frac{k}{k+\mu}\right)^k + \left(\frac{\mu}{k+\mu}\right)^x + + for ``x >= 0``. In the traditional parameterization, ``n = k`` (the size + parameter) and ``p = k / (k + mu)``. The location parameter ``loc`` is not + used. + + %(before_notes)s + + """ + + @inherit_docstring_from(rv_discrete_meco) + def fit2(self, x, k_range=(0.1,100,0.1)): + """%(super)s + Requires one argument containing data to fit. A keyword argument + k_range contains a tuple of the start, stop, and step values to search + for k. Default is ``k_range=(0.1,100,0.1)``. + + This method recognizes that the MLE of the mu parameter is simply equal + to the mean of the data. A brute force search is then used to find the + parameter k. + + """ + assert len(x) > 20, "nbinom fit is not stable with <20 data points" + mu = np.mean(x) + return mu, _nbinom_solve_k_from_mu(x, mu, k_range) + + def _get_p_from_mu(self, mu, k): + return k / (k + mu) + + def _rvs(self, mu, k): + p = self._get_p_from_mu(mu, k) + return nprand.negative_binomial(k, p, self._size) + + def _argcheck(self, mu, k): + p = self._get_p_from_mu(mu, k) + return (k >= 0) & (p >= 0) & (p <= 1) + + def _pmf(self, x, mu, k): + p = self._get_p_from_mu(mu, k) + return np.exp(self._logpmf(x, mu, k)) + + def _logpmf(self, x, mu, k): + p = self._get_p_from_mu(mu, k) + coeff = spec.gammaln(k+x) - spec.gammaln(x+1) - spec.gammaln(k) + return coeff + k*np.log(p) + x*np.log(1-p) + + def _cdf(self, x, mu, k): + p = self._get_p_from_mu(mu, k) + x = np.floor(x) + return spec.betainc(k, x+1, p) + + def _stats(self, mu, k): + p = self._get_p_from_mu(mu, k) + Q = 1.0 / p + P = Q - 1.0 + mu = k*P + var = k*P*Q + g1 = (Q+P)/np.sqrt(k*P*Q) + g2 = (1.0 + 6*P*Q) / (k*P*Q) + return mu, var, g1, g2 + +nbinom = nbinom_gen(name='nbinom', shapes='mu, k') + +def _nbinom_solve_k_from_mu(x, mu, k_range): + """ + For the nbinom, given mu, return k from searching some k_range. + """ + + # TODO: See if a root finder like fminbound would work with Decimal used in + # logpmf method (will this work with arrays?) + + def nll(x, mu, k): + return -np.sum(nbinom._logpmf(x, mu, k)) + + k_array = np.arange(*k_range) + nll_array = np.zeros(len(k_array)) + + for i in range(len(k_array)): + nll_array[i] = nll(x, mu, k_array[i]) + + min_nll_idx = np.argmin(nll_array) + + return k_array[min_nll_idx] + +# +# Continuous +# + +class expon_gen(rv_continuous_meco): + r""" + An exponential continuous random variable. + + .. math:: + + \mathrm{pdf(x)} = \lambda e^{-\lambda x} + + for ``x >= 0``. + + %(before_notes)s + + """ + + def _rvs(self, lam): + return nprand.exponential(1/lam, self._size) + + def _pdf(self, x, lam): + return lam * np.exp(-lam*x) + + def _cdf(self, x, lam): + return 1 - np.exp(-lam*x) + + def _entropy(self, lam): + return 1 - np.ln(lam) + + def _stats(self, lam): + return lam**-1, lam**-2, 2, 6 + +expon = expon_gen(a=0.0, name='expon', shapes='lam') + + +class expon_uptrunc_gen(rv_continuous_meco): + r""" + An upper-truncated exponential continuous random variable. + + .. math:: + + \mathrm{pdf(x)} = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda x}} + + for ``b >= x >= 0``. + + %(before_notes)s + + """ + + # Internally, class works by creating a new expon_gen object with the + # appropriate upper limit and calling its methods. + + # TODO: Do all of these broadcast correctly, or should we call _pdf, etc.? + + def _rvs(self, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.rvs(lam) + + def _pdf(self, x, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.pdf(x, lam) + + def _cdf(self, x, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.cdf(x, lam) + + def _entropy(self, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.entropy(lam) + + def _stats(self, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.stats(lam) + + @inherit_docstring_from(rv_discrete_meco) + def fit2(self, data, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.fit(data, lam, floc=0, fscale=1), b + +expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') + + + + diff --git a/macroeco/tests/test_distributions2.py b/macroeco/tests/test_distributions2.py new file mode 100644 index 0000000..9b3accb --- /dev/null +++ b/macroeco/tests/test_distributions2.py @@ -0,0 +1,150 @@ +""" +Tests for distributions2 module + +""" + +from __future__ import division + +from numpy.testing import (TestCase, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_, assert_raises) + +import numpy as np +from decimal import Decimal +import macroeco.distributions2 as dist2 +from macroeco.distributions2 import * +import matplotlib.pyplot as plt + +class TestGeom(TestCase): + + def test_pmf(self): + vals = geom.pmf([0,1,2], 0.5) + assert_array_almost_equal(vals, [0.5,0.25,0.125]) + + def test_mean(self): + mu1 = geom.mean(0.5) + assert_almost_equal(mu1, 1) + + mu2 = geom.mean(0.25) + assert_almost_equal(mu2, 3) + + def test_cdf(self): + vals = geom.cdf([0,1,2], 0.5) + assert_array_almost_equal(vals, [0.5,0.75,0.875]) + + def test_translate_args(self): + ps = geom.translate_args([10, 20]) + assert_array_almost_equal(ps, [1/11, 1/21]) + + def test_fit2(self): + p = geom.fit2([1,2,4,5]) + assert_almost_equal(p, 0.25) + + +class TestGeomUptrunc(TestCase): + + def test_pmf(self): + # Expected values are regular geo cdf divided by cdf at b + vals = geom_uptrunc.pmf([0,1,2], 0.5, 2) + assert_array_almost_equal(vals, np.array([0.5,0.25,0.125])/0.875) + + def test_cdf(self): + # Expected values are regular geom cdf divided by cdf at b + vals = geom_uptrunc.cdf([0,1,2], 0.5, 2) + assert_array_almost_equal(vals, np.array([0.5,0.75,0.875])/0.875) + + def test_mean(self): + mu1 = geom_uptrunc.mean(0.801, 32) + assert_almost_equal(mu1, 4, decimal=2) + + def test_translate_args_harte_16(self): + # TODO: The Harte figures appear to be inaccurate, generate better + # canonical test case for next two tests and for test_fit2 and + # test_mean + + # From Harte 2011, Oxford U Press, Tab 7.4, n0=16 row, Eq 7.50 + b = 16 + mu = np.array([2, 1]) # A0/8, A0/16 + expected = np.array([0.669, 0.500]) + ps, _ = geom_uptrunc.translate_args(mu, b) + assert_almost_equal(ps, expected, decimal=3) + + def test_translate_args_harte_32(self): + # From Harte 2011, Oxford U Press, Tab 7.4, n0=32 row, Eq 7.50 + b = 32 + mu = np.array([4, 2]) # A0/8, A0/16 + expected = np.array([0.801, 0.667]) + ps, _ = geom_uptrunc.translate_args(mu, b) + assert_almost_equal(ps, expected, decimal=3) + + def test_translate_args_mqwilber_hand_calc(self): + # TODO: Confirm last 4 of tests, which more accurate + b = np.array([60, 340, 34]) + mu = np.array([60*.1, 340*.6, 34*.9]) + expected = np.array([.8572, 1.0036, 1.2937]) + ps, _ = geom_uptrunc.translate_args(mu, b) + assert_almost_equal(ps, expected, decimal=3) + + def test_translate_args_with_sum_of_pmf(self): + p1, b1 = geom_uptrunc.translate_args(341/4, 341) # Issue 33 + assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(101),p1,b1))) + + p2, b2 = geom_uptrunc.translate_args(120, 200) # Arbitrary + assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(101),p2,b2))) + + def test_fit2(self): + p1, _ = geom_uptrunc.fit2([0,10], 10) + assert_almost_equal(p1, 1) + + p2, _ = geom_uptrunc.fit2([1,3], 16) + assert_almost_equal(p2, 0.669, decimal=3) + + +class TestNbinom(TestCase): + + def test_pmf(self): + #> dnbinom(c(0,1,2), 3, mu=5) + #[1] 0.05273438 0.09887695 0.12359619 + vals = nbinom.pmf([0,1,2], 5, 3) + assert_array_almost_equal(vals, [0.05273438, 0.09887695, 0.12359619]) + + def test_cdf(self): + #> pnbinom(c(0,1,2),2,mu=30) + #[1] 0.00390625 0.01123047 0.02153015 + vals = nbinom.cdf([0,1,2], 30, 2) + assert_array_almost_equal(vals, [0.00390625, 0.01123047, 0.02153015]) + + def test_mean_var(self): + mu1, var1 = nbinom.stats(20, 2, moments='mv') + assert_array_almost_equal([mu1, var1], [20, 20+(20**2)/2]) + + def test_get_p_from_mu(self): + assert_almost_equal(nbinom._get_p_from_mu(10, 2), 2/12) + + def test_fit2_with_rvs(self): + np.random.seed(8) + x = nbinom.rvs(20, 10, size=100) + mu, k = nbinom.fit2(x) + assert_array_almost_equal([mu, k], [20, 10], decimal=0) + + def test_fit2_with_R(self): + #> library(MASS) + #> fitdistr(seq(49), "negative binomial") + x = np.array(range(1,50)) + mu, k = nbinom.fit2(x) + assert_array_almost_equal([mu, k], [25, 2.4337345], decimal=1) + + def test_fit2_with_manual_calc(self): + x = np.array([6,17,14,12,8,10,4,9,3,12,4,2,12,8,14,16,9,10,8,5,6]) + mu, k = nbinom.fit2(x, k_range=(0.01,10,0.01)) + assert_array_almost_equal([mu, k], [9, 8.54], decimal=2) + + + +class TestExpon(TestCase): + pass + + +class TestExponUptrunc(TestCase): + pass + From c769a3c3c82c0de6eb72ad53d3713f1fec7b9024 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Feb 2014 11:32:09 -0800 Subject: [PATCH 030/343] Remove accidentally committed pyc files From e9b5dbf3340eb276ea82cc7bb2d37395d6145fbd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Feb 2014 15:19:28 -0800 Subject: [PATCH 031/343] Add gh-pages routine to Sphinx makefile --- doc/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/Makefile b/doc/Makefile index 34e16a5..8c4003f 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -15,7 +15,7 @@ ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext gh-pages help: @echo "Please use \`make ' where is one of" @@ -152,3 +152,9 @@ doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." + +gh-pages: + rm -rf ../../macroeco-docs/* + make html + cp -r _build/html/* ../../macroeco-docs/ + cd ../../macroeco-docs; git checkout gh-pages && touch .nojekyll && git add -A && git ci -m "Generated gh-pages for macroeco commit `git --git-dir=../macroeco/.git log --pretty=format:'%h' -n 1`" && git push origin gh-pages \ No newline at end of file From 19d0c8dd3f2d6b47765111aca6e50320186ea1ce Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 26 Feb 2014 10:19:29 -0800 Subject: [PATCH 032/343] Update Makefile to build in this repo gh-pages branch --- doc/Makefile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/Makefile b/doc/Makefile index 8c4003f..e4230b9 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -154,7 +154,17 @@ doctest: "results in $(BUILDDIR)/doctest/output.txt." gh-pages: - rm -rf ../../macroeco-docs/* + +gh-pages: + git checkout gh-pages + rm -rf * ../macroeco ../*.* ../_* + cd ../; git checkout develop doc macroeco make html - cp -r _build/html/* ../../macroeco-docs/ - cd ../../macroeco-docs; git checkout gh-pages && touch .nojekyll && git add -A && git ci -m "Generated gh-pages for macroeco commit `git --git-dir=../macroeco/.git log --pretty=format:'%h' -n 1`" && git push origin gh-pages \ No newline at end of file + cp -r _build/html/* ../ + rm -rf _build generated ../macroeco + touch ../.nojekyll + git add --all :/ + git commit -m "Generated gh-pages for `git log develop -1 --pretty=short --abbrev-commit`" && git push origin gh-pages + rm -rf * ../*.* ../_* + git checkout develop + git checkout -- . \ No newline at end of file From 4d7ded94cc86f7b7aaf85a81bb975e7619784414 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 26 Feb 2014 11:30:49 -0800 Subject: [PATCH 033/343] Remove accidentally committed DS_Store files --- doc/_templates/.DS_Store | Bin 6148 -> 0 bytes doc/_templates/autosummary/.DS_Store | Bin 6148 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 doc/_templates/.DS_Store delete mode 100644 doc/_templates/autosummary/.DS_Store diff --git a/doc/_templates/.DS_Store b/doc/_templates/.DS_Store deleted file mode 100644 index 8baf407a092f545d912839263f5fe9d08090386c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}N6?5Kh{vy9&C3g5GlT(!+{B;K`-dlNVvd9#nLfEq0-9q`Pa8TG*q%kKj@C zk@RhxNm5a&HxZSYF!?5vnWXtrk_}^w*YaqQF^4f`fg+Y{s6G+wM;(%q@gQ6Nl*uXk zz;DT}p9NW-)`HF+wzh+;6&ia-%J!Qbgu8LGd+5w;$RrEmq}fr)anyv6{YIQbvRjjR z5~cgTb~fTDYpFqv!?vItIaO+s&b@8TFD$KZh4SL^>iXvP z?%`2$&f)K=WZK{Wp1}BG`DcD7Y0KmoA_m!mEJ9*{7$64zI0NQbvnKxdI_T#U1H{1h zGl1uV07di+78=#j0Tq5eVt)k@1#EmvAPR$?!9pYSfN-4(s8hLlVsM=feqrJ~gM~(& z&bXQx#xXN<^FrZjcJK=o&bX(MYGQyG_{cz6HyynHH@|-We@vnlF+dFbD+YL_?$>Lu zBy+b8Ee`Km0eS?Af^mh$w-hkcRSdCs70-bx0lz>4&@)(Q1P=&(2q+q;A_jhxfmc6D BTw4GD diff --git a/doc/_templates/autosummary/.DS_Store b/doc/_templates/autosummary/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Mon, 3 Mar 2014 15:32:29 -0800 Subject: [PATCH 034/343] Start framework for new main controller --- macroeco/compare.py | 1591 +------------------ macroeco/distributions2.py | 79 +- macroeco/empirical.py | 33 +- macroeco/main.py | 523 ++++++ macroeco/{tests => }/test_distributions2.py | 0 5 files changed, 643 insertions(+), 1583 deletions(-) create mode 100644 macroeco/main.py rename macroeco/{tests => }/test_distributions2.py (100%) diff --git a/macroeco/compare.py b/macroeco/compare.py index 8f62980..fbf9ecd 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -1,1575 +1,46 @@ -#!/usr/bin/python +""" +=========================== +Main (:mod:`macroeco.main`) +=========================== -''' This module contains classes and functions for comparing empirical and -predicted macroecological metrics. +This module contains functions that compare the goodness of fit of a +distribution/curve to data or the fit of two distributions/curves to each +other. -Classes -------- -CompareDistribution : Base class to for CompareSAD, CompareSSAD, CompareIED, -CompareSED, CompareASED +.. autosummary:: + :toctree: generated/ -CompareSAD : Compares predicted species abundance distributions (SAD) with -empirical SADs + main -CompareSSAD : Compares predicted species-level spatial abundance distributions -(SSAD) with empirical SSADS - -CompareSAR : Compares predicted species-area relationship (SAR) curves with -empirical SAR curves - -CompareIED : Compares predicted individual energy distributions (IED) with -empirical IEDs - -CompareSED : Compares predicted species-level energy distributions (SED) with -empirical SEDs - -CompareASED : Compares predicted average species-level energy distributions -(ASED) with empirical ASEDs. - -Functions ---------- --`empirical_cdf` -- Empirical cdf for given data --`aic` -- Calculate AIC value --`aicc` -- Calculate corectted AIC value --`aic_wieghts` -- Calculate AIC weights for models --`ks_two_sample_test` -- Kolmogrov-Smirnov two sample test --`likelihood_ratio` -- Calculated likelihood ratio for nested models --`variance` -- Calculates the variance for given datasets --`skew` -- Calculates the skew for given datasets --`kurtosis` -- Calculates the kurtosis for given data sets --`bootstrap` -- Get bootstrapped samples from a dataset -- `bootstrap_moment` -- Gives a BS confidence interval for a comparison of - first three moments of two distributions --`'mean_squared_error` -- Calculates the MSE between an obs and pred data set - - -''' +""" from __future__ import division + import numpy as np import scipy.stats as stats -from distributions import * -import copy -import random -import time -import logging - - -class CompareDistribution(object): - ''' - Comparison object compares a list of data to any number of distributions - - ''' - - #TODO: Error Checking - def __init__(self, data_list, dist_list, observed_index): - ''' - Parameters - ---------- - data_list : list of iterables or list of tuples of iterables - data_list is any list of iterables or list of tuples of iterables - that will be passed to the fit functions of the distribution - objects in dist_list. data_list will be passed to fit functions for - each distribution. data_list undergoes no validation in __init__ - dist_list : list - List of distribution objects or strings that have the same name as - a distribution object. If they are strings, they will be evaled - observed_index : int - The index of the desired observed metric in the tuples within - data_list. If 0, data_list can be a list of data - rather than a list of tuples of data. The index specified by - object_ind will be considered the observed data. - - Notes - ----- - All distribution objects are fit in the __init__ method. - - ''' - - self.dist_list = make_dist_list(dist_list) - - # Fit the distributions objects - [dist.fit(data_list) for dist in self.dist_list] - - # Set the observed data - if observed_index == 0 and np.all([type(dt) != type((1,)) for dt in - data_list]): - self.observed_data = [np.array(dt) for dt in data_list] - elif np.all([type(dt) == type((1,)) for dt in data_list]): - self.observed_data = [np.array(dt[observed_index]) for dt in - data_list] - else: - self.observed_data = [np.array(dt) for dt in data_list] - - # Set this in __init__ so other methods can check if compare_rads() has - # been called - self.rads = None - self.cdfs = None - - # If attributes have not been instantiated, set to None - try: - self.sad_spp_list - except: - self.sad_spp_list = None - try: - self.criteria - except: - self.criteria = None - - def compare_mse(self, mse_base='cdf'): - ''' - This function compares the mean squared error (mse) for each distribution - against the observed data, self.observed_data. Perfect predicted data - would yield a mse of 0. The lower the mse the better the predicted - values fit the data. If mse_base='cdf' the mse is calculated from the - cdf. If mse_base='rad', the mse is calculated from the rank_abundance - distribution. - - Parameters - ----------- - mse_base : str - Either 'cdf' or 'rad'. If 'cdf' the mse values are computed - from the cumulative density function. It 'rad' the mse values are - computed from the rank abundance distribution. Default is 'cdf' - - Returns - ------- - : dict - A dictionary of length self.dist_list with keywords being the - distribution names. Each keyword looks up a list of length - self.observed_data in which are the mse values comparing that - distribution's predicted values (cdf or rad) to the corresponding - observed values. - - Notes - ----- - Calculating the mse from the cdf is the least bias approximater - - ''' - if mse_base == 'cdf': - if self.cdfs == None: - vals = self.compare_cdfs() - else: - vals = self.cdfs - elif mse_base == 'rad': - if self.rads == None: - vals = self.compare_rads() - else: - vals = self.rads - else: - raise NameError('%s value for mse_base not recognized' % mse_base) - - - mse = {} - for kw in vals.iterkeys(): - if kw != 'observed': - if not np.all([len(j) == 0 for j in vals[kw]]): - mse[kw] = [mean_squared_error(vals['observed'][i], - vals[kw][i]) for i in xrange(len(vals[kw]))] - else: - logging.warning('MSE values for %s set to NaN' % kw) - mse[kw] = [np.NaN for i in xrange(len(self.observed_data))] - return mse - - - def compare_aic(self, crt=False): - ''' - Get the aic or aicc values for every data set and for every - distribution - - Parameters - ---------- - crt : bool - If True, calculates the corrected AIC for the given data. If False, - calculates AIC. - - Returns - ------- - : list - A list of arrays. The list has length = to number of data sets in - self.observed_data. Each array within list has the length of - self.dist_list. The first element of the array corresponds to the - first distribution in dist_list, the second corresponds to the - second distribution, etc. - - ''' - aic_vals = [] - for dist in self.dist_list: - - try: - nlls = nll(dist.pmf(self.observed_data)) - except NotImplementedError: - try: - nlls = nll(dist.pdf(self.observed_data)) - except NotImplementedError: - logging.warning('%s has neither a PMF nor a PDF. AIC set' - % get_name(dist) + ' to infinity') - nlls = np.repeat(np.inf, len(self.observed_data)) - - #NOTE: dist.par_num is the number of parameters of distribution - k = np.repeat(dist.par_num, len(nlls)) - if crt: - obs = np.array([len(data) for data in self.observed_data]) - aic_vals.append(aicc(nlls, k, obs)) - else: - aic_vals.append(aic(nlls, k)) - return list(np.array(aic_vals).T) - - def compare_aic_measures(self, crt=False): - ''' - Compare AIC weights, delta_AIC, and AIC values across the different - models. Output is a three item tuple where each item is a list of - arrays with each array having length equal to the number of models - proposed and the length of the list is the length of self.observed_data. - See Returns for tuple description. - - Parameters - ---------- - crt : bool - If True, calculates the corrected AIC weights for the given data. - If False, calculates AIC weights. - - Returns - ------- - : tuple - The first element is a list of arrays with each array having length - equal to the number of models proposed and the length of the list - is the length of self.observed_data. The first element contains - the AIC weights. The second element is the delta AIC values in the - same format as the first tuple object. The third object are the AIC - values in the same format as the output of the compare_aic method. - - Notes - ----- - The given AIC values in each array correspond to the distributions in - self.dist_list. - - ''' - aic_vals = self.compare_aic(crt=crt) - aic_wghts = []; delta_aic = [] - for mods_aic in aic_vals: - taic_wghts, tdelta_aic = aic_weights(mods_aic) - aic_wghts.append(taic_wghts) - delta_aic.append(tdelta_aic) - return aic_wghts, delta_aic, aic_vals - - def compare_rads(self): - ''' - Compares rank abundance distributions for all data in data_list and to - the given distributions - - Returns - ------- - : dict - Has len(self.dist_list) + 1. All the distribution class names - passed to the constructor are key words as well as 'observed' which - references the observed data, self.observed_data. Each keyword looks up - a list of arrays. Each list is len(self.observed_data) long and - contains the predicted rads for the empirical data sets for the - given distribution. - - Note - ---- - If self.rads has already been set in another method (i.e. is not None). - This method will not overwrite it. To reset self.rads, set self.rads - = None and then run self.compare_rads(). - - ''' - if self.rads == None: - rads_dict = {} - rads_dict['observed'] = copy.deepcopy(self.observed_data) - for i, dist in enumerate(self.dist_list): - #Different Identifier? - rads_dict[get_name(dist)] = dist.rad() - - self.rads = rads_dict - return self.rads - - def compare_cdfs(self): - ''' - Compares cdfs for all data in data_lists and to the empirical cdfs - - Returns - ------- - :dict - Has len(self.dist_list) + 1. All the distribution class names - passed to the constructor are key words as well 'observed' which - references the observed data, self.observed_data. Each keyword looks up - a list of arrays. Each list is len(self.observed_data) long and - contains the predicted cdfs for the empirical data sets for the - given distribution. - - - ''' - if self.cdfs == None: - - cdfs_dict = {} - cdfs_dict['observed'] = [empirical_cdf(data) for data in - self.observed_data] - for i, dist in enumerate(self.dist_list): - try: - cdfs_dict[get_name(dist)] = dist.cdf(self.observed_data) - except NotImplementedError: - logging.warning('CDF method not implemented for %s' % - get_name(dist)) - cdfs_dict[get_name(dist)] = [np.array([]) for i in - xrange(len(self.observed_data))] - - self.cdfs = cdfs_dict - return self.cdfs - - - def compare_LRT(self, null_mdl): - ''' - Performs a likelihood ratio test (LRT) on the distributions with in - self.dist_list with the parameter nll_mdl as the null model. While this - function will generate output on non-nested models, the models must be - nested for the output to be meaningful. - - Parameters - ---------- - null_mdl : distribution object - The null distribution object to use in the LRT. - - Returns - ------- - : dict - A dictionary with keywords 'null_model, alternative model.' Each - keyword references a list of length len(self.observed_data) which - contains tuples that contain the output of the function - likelihood_ratio (chisquared, p-value). The LRT is performed on - each data set in self.observed_data for each given model pair. - - ''' - LRT_list = {} - null_mdl.fit(self.observed_data) - - try: - null_nlls = nll(null_mdl.pmf(self.observed_data)) - except: - null_nlls = nll(null_mdl.pdf(self.observed_data)) - for i, dist in enumerate(self.dist_list): - - try: - alt_nlls = nll(dist.pmf(self.observed_data)) - except: - alt_nlls = nll(dist.pdf(self.observed_data)) - - k = dist.par_num - null_mdl.par_num - df = np.repeat(k, len(alt_nlls)) - lrt = likelihood_ratio(null_nlls, alt_nlls, df) - comp_kw = get_name(null_mdl) + ", " + get_name(dist) - LRT_list[comp_kw] = lrt - return LRT_list - - def compare_rarity(self, mins_list): - ''' - This method takes in the output from self.compare_rads and a list of - minimum values against which to compare the observed and predicted - rads. and outputs a dictionary with length self.dist_list + 1 (all - distributions + observed). Each keyword in this dict looks up a dict - of len(mins_list) where the keywords are the values against which the - rads will be <=. Each one of these sub-dictionaries looks up a list - with len(self.observed_data). - - Parameters - ---------- - mins_list : array-like object - A list of numbers. Each number number will be used in the - following function: rad <= mins_list[i]. - - Returns - ------- - : dict - Returns a dictionary with length self.dist_list + 1 (all - distributions + observed). Each keyword in this dict looks up a dict - of len(mins_list) where the keywords are the values against which the - rads will be <=. Each one of these sub-dictionaries looks up a list - with len(self.observed_data). - - - ''' - - # Don't remake rads if they have already been made - if self.rads == None: - rads = self.compare_rads() - else: - rads = self.rads - - mins_list = make_array(mins_list) - - rarity = {} - keys = list(rads.viewkeys()) - for kw in keys: - rarity[kw] = {} - for mins in mins_list: - rarity[kw][mins] = [sum(data <= mins) for data in rads[kw]] - return rarity - - def compare_moments(self): - ''' - Compare the higher order moments (variance, skew, kurtosis) for the - given distributions and observed data. - - Returns - ------- - : dict - A dictionary with keywords variance, skew, and kurtosis. Each - keyword looks up a dictionary len(dist_list) + 1 keywords. The - keywords are 'observed' and the distribution object names. Each of - these keywords looks up a list of floats with the same length as - data_list. - - ''' - - if self.rads == None: - rads = self.compare_rads() - else: - rads = self.rads - - var = {} - skw = {} - kurt = {} - - for kw in rads.iterkeys(): - var[kw] = variance(rads[kw]) - skw[kw] = skew(rads[kw]) - kurt[kw] = kurtosis(rads[kw]) - moments = {} - moments['variance'] = var - moments['skew'] = skw - moments['kurtosis'] = kurt - - return moments - - def summary(self, mins_list=[10], crt=False): - ''' - Summarizes the given datasets and the predicted rads. Looks at - total balls sampled ('balls'), number of urns ('urns'), the max balls - in a given urn ('max'), number of urns with less than MIN balls ('tot - <= MIN'), and the fit of the distributions in self.dist_list to the - data in self.observed_data - - 'balls' is the sum of the observed data. For a Species Abundance - Distribution 'balls' would represent individuals. For an Individual - Energy Distribution 'balls' would represent energy. - - 'urns' is the length of the observed data. For a Species Abundance - Distribution 'urns' would represent species and for a Individual Energy - Distribution 'urns' would represent individuals. - - Parameters - ---------- - mins_list : list - Bins with balls less than or equal to 10 - crt : bool - If True, corrected AIC, if False, not - - Returns - ------- - : dict - Dictionary of dictionaries of length self.dist_list + 1. Each - sub-dictionary other than 'observed' contains the keywords balls, - urns, max, tot_min, aic, aic_d, aic_w, and par_num. Each of these - keywords contains a list that is the same length as the number of - sads under consideration. - - - urns = total number of items in self.observed_data. Could be - species (SAD, ASED), cells (SSAD), or individuals (IED, SED) - balls = Items that are placed in urns. Could be individuals (SAD, - SSAD), energy (ASED, IED, SED). - max = Maximum number of balls in an urn - tot_min = Total number of urns with with <= a given number of balls - aic = AIC - aic_d = Delta AIC - aic_w = AIC weights - par_num = Parameter number of the given distribution - tot_min = total counts less than or equal numbers in min_list - vars = Additional variables computed for the given distribution - - - ''' - summary = {} - - # Check that rads is already set, if not set it - if self.rads == None: - rads = self.compare_rads() - if type(rads) == type((1,)): - rads = rads[0] - else: - rads = self.rads - - rarity = self.compare_rarity(mins_list=mins_list) - for kw in rads.iterkeys(): - summary[kw] = {} - summary[kw]['balls'] = [np.sum(data) for data in rads[kw]] - summary[kw]['urns'] = [len(data) for data in rads[kw]] - summary[kw]['max'] = [np.max(data) for data in rads[kw]] - summary[kw]['tot_min'] = rarity[kw] - - aic_vals = self.compare_aic_measures(crt=crt) - names = [get_name(dist) for dist in self.dist_list] - for i, nm in enumerate(names): - summary[nm]['aic'] = list(np.array(aic_vals[2]).T)[i] - summary[nm]['aic_d'] = list(np.array(aic_vals[1]).T)[i] - summary[nm]['aic_w'] = list(np.array(aic_vals[0]).T)[i] - summary[nm]['par_num'] = np.repeat(self.dist_list[i].par_num, - len(list(np.array(aic_vals[2]).T)[i])) - summary[nm]['vars'] = self.dist_list[i].var - - return summary - -class CompareSAD(CompareDistribution): - ''' - Object inherits CompareDistribution and uses it to compare species - abundance distributions (SAD) - - Attributes - ---------- - self.observed_data : A list of arrays - Each array in this list is an SAD. Each of these SADs will be compared - to the distributions in self.dist_list - self.dist_list : a list of distribution objects - Each object is a distribution object to which the SADs in - self.observed_data will be compared. - self.criteria : a list of dictionaries or None - If not None, each dictionary specifies the divisions made on the plot - that generated each SAD in self.observed_data. self.criteria should be - the same length as self.observed_data - self.sad_spp_list : list of arrays or None - If not None, each array contains the species strings for the - corresponding SAD in self.observed_data. The length of - self.sad_spp_list should be the same length as self.observed_data and - the length of any array within self.sad_spp_list should be the same - length the corresponding array in self.observed_data. The index of any - species name within any array within self.sad_spp_list references the - species count with the same index in self.observed_data. - - ''' - - def __init__(self, data_list, dist_list, patch=False): - ''' - Parameters - ---------- - data_list : list of iterables or output from Patch.sad - List of np.arrays containing data - dist_list : list - List of distribution objects or strings that have the same name as - a distribution object. If they are strings, they will be evaled - patch : bool - If True, expects the output from the Patch.sad method and if False, - expects a list of iterables. Presumably, each iterable is an SAD. - - Notes - ----- - If data_list is a list of tuples containing iterables, the 1st entry - (0th element) in each tuple is considered the observed SADs - ''' - if patch == True: - self.criteria, sad_data, self.sad_spp_list = unpack(data_list) - super(CompareSAD, self).__init__(sad_data, dist_list, 0) - else: - super(CompareSAD, self).__init__(data_list, dist_list, 0) - -class CompareSSAD(CompareDistribution): - ''' - Object inherits CompareDistribution and uses it to compare species-level - spatial abundance distributions (SSAD) - - Attributes - ---------- - self.observed_data : A list of arrays - Each array in this list is an SSAD. Each of these SSADs will be - compared to the distributions in dist_list - self.dist_list : a list of distribution objects - Each object is a distribution object to which the SSADs in - self.observed_data will be compared. - self.criteria : a list of dictionaries or None - If not None, each dictionary specifies the divisions made on the plot - that generated each SAD in self.observed_data. self.criteria should be - the same length as self.observed_data - self.sad_spp_list : List of strings or None - If not None, self.sad_spp_list is a list of strings where each string - refers to a species. The length of self.sad_spp_list should be the same - length as self.observed_data. Each species string has the same index - within the list as its corresponding SSAD in self.observed_data. - - ''' - - def __init__(self, data_list, dist_list, patch=False): - ''' - Parameters - ---------- - data_list : list of iterables or output from Patch.ssad - List of np.arrays containing data - dist_list : list - List of distribution objects or strings that have the same name as - a distribution object. If they are strings, they will be evaled - patch : bool - If True, expects the output from the Patch.sad method and if False, - expects a list of iterables. Presumably, each iterable is an SSAD. - - - Notes - ----- - If data_list is a list of tuples containing iterables, the 1st entry - (0th element) in each tuple is considered the observed SSADs - ''' - if patch == True: - - self.sad_spp_list = list(data_list[1].viewkeys()) - ssad_data = [np.array(data_list[1][nm]) for nm in - self.sad_spp_list] - self.criteria = data_list[0] - - super(CompareSSAD, self).__init__(ssad_data, dist_list, 0) - else: - super(CompareSSAD, self).__init__(data_list, dist_list, 0) - - - -class CompareIED(CompareDistribution): - ''' - Class compares predicted individual energy distributions (IED) for the - entire community to observed IEDs - - Attributes - ---------- - self.observed_data : list of arrays - Observed individual energy distributions (IED) - self.ied_spp_lists : list of arrays - Each array contains species strings which pair to the values - contained in the corresponding array in self.ied_list. The length of - self.ied_spp_lists should be the same length as self.ied_list. - self.sad_spp_list : list of arrays - If not None, each array contains the species strings for the - corresponding SAD in self.sad_list. The length of self.sad_spp_list - should be the same length as self.sad_list and the length of any array - within self.sad_spp_list should be the same length the corresponding - array in self.sad_list. The index of any species name within any array - within self.sad_spp_list references the species count with the same - index in self.sad_list. - self.criteria : a list of dictionaries or None - If not None, each dictionary specifies the divisions made on the plot - that generated each SAD and IED in self.sad_list and self.ied_list. - self.criteria should be the same length as self.sad_list and - self.ied_list. - self.dist_list : a list of distribution objects - Each object is a distribution to which the IEDs in self.ied_list will - be compared. - - ''' - - def __init__(self, data_list, dist_list, patch=False): - ''' - Parameters - ---------- - data_list : list of tuples or output from Patch object - A list containing tuples of length two. The first object in a - tuple an iterable containing the community individual energy - distribution. The second object in a tuple is an iterable - containing the empirical species abundance distribution. - See patch argument for more information. - dist_list : list of strings or objects - Each string corresponds to a name of a psi distribution to which to - compare to the observed data. - patch: bool - If True, expects a tuple of length 2 with the first object being - the output from Patch.ied and the second element being the - output from Patch.sad. If False expects what argument data_list - describes. sads and energy should be made with the same criteria. - - Notes - ----- - The __init__ method always removes zeros from the SADs - - If data_list is a list of tuples containing iterables, the 1st entry - (0th element) in each tuple is considered the observed IEDs - ''' - - if patch == True: - # Unpack sad. Store spp_lists in items - sad_criteria, sad_list, self.sad_spp_list = \ - unpack(data_list[1]) - - # Unpack ied - ied_criteria, ied_list, self.ied_spp_lists = \ - unpack(data_list[0]) - self.criteria = sad_criteria - - super(CompareIED, self).__init__(zip(ied_list, sad_list), - dist_list, 0) - - else: - super(CompareIED, self).__init__(data_list, dist_list, 0) - self.ied_spp_lists = None - - - -class CompareSED(CompareDistribution): - ''' - Class compares predicted species-level energy distribution(s) with the - observed species-level energy distribution(s) - - Attributes - ---------- - self.observed_data : list of iterables - Observed species energy distributions (SED) - self.criteria : a list of dictionaries or None - If not None, each dictionary specifies the divisions made on the plot - that generated each SED, IED, and SAD and IED in self.sed_list, - self.ied_list, and self.sad_list. All self.criteria should have the - same length. - self.dist_list : a list of distribution objects - Each object is a distribution to which the IEDs in self.ied_list will - be compared. - self.sad_spp_list : list of strings or None - If not None, each string in self.spp_names is a species ID which - corresponds to an array in self.sed_list. - - ''' - - def __init__(self, data_list, dist_list, patch=False): - ''' - Parameters - ---------- - data_list : list of tuples or output from Patch object - A list of tuple where each tuple has length 3. The first object in - a tuple is an iterable containing the empirical species energy - distribution. The second object is a tuple is a community - individual energy distribution. The third object in a tuple is an - empirical species abundance distribution. - dist_list : list of strings or objects - Each string corresponds to a name of a psi distribution to which to - compare to the observed data. - patch : bool - If True, expects a tuple of length 3 with the first object being - the complete output from Patch.sed, the second object being the - output from Patch.ied and the third element being the output from - Patch.sad. If False expects what argument data_list describes. - Empirical sads and energy distributions should be made with the - same criteria (See Patch class for criteria explanation). - - Notes - ----- - If data_list is a list of tuples containing iterables, the 1st entry - (0th element) in each tuple is considered the observed SEDs. - ''' - - if patch: - # TODO: Check length of input objects! - - if not ((len(data_list[0]) == len(data_list[1])) and\ - (len(data_list[1]) == len(data_list[2]))): - raise IndexError('SED, IED, and SAD patch returns' +\ - ' must have the same length. Use the same criteria for' +\ - ' each.') - - #Sort species energy - sed_criteria = [] - sed_list = [] - spp_names = [] - for obj in data_list[0]: - spp = list(obj[1].viewkeys()); spp.sort() - spp_names.append(spp) - for kw in spp_names[-1]: - sed_list.append(obj[1][kw]) - sed_criteria.append(obj[0]) - - #Sort community energy - ied_criteria = [] - ied_list = [] - for i, obj in enumerate(data_list[1]): - - # For consistency I am copying the ied data for each species - num = len(spp_names[i]) - tcri = [obj[0] for i in xrange(num)] - ied_criteria += tcri - teng = [obj[1] for i in xrange(num)] - ied_list += teng - - #Sort sad - sad_criteria = [] - sad_list = [] - for i, obj in enumerate(data_list[2]): - - # Copy sad data for each species - num = len(spp_names[i]) - tcri = [obj[0] for i in xrange(num)] - sad_criteria += tcri - tsad = [obj[1] for i in xrange(num)] - sad_list += tsad - - self.sad_spp_list = [] - for i in xrange(len(spp_names)): - self.sad_spp_list += spp_names[i] - self.criteria = sad_criteria - - super(CompareSED, self).__init__(zip(sed_list, ied_list, sad_list), - dist_list, 0) - - else: - - super(CompareSED, self).__init__(data_list, dist_list, 0) - - def compare_rads(self, return_spp=False): - ''' - Comparison of species level energy distributions rank abundance - distributions. - - Parameters - ---------- - return_spp : bool - If True, the returns a tuple with a species list as the second - element. - Returns - ------- - : dict - Has len(self.dist_list) + 1. All the distribution class names - passed to the constructor are key words as well as 'observed' which - references the observed data. Each keyword looks up - a list of arrays. Each list is len(self.ied_list) long and - contains the predicted reds for the empirical data sets for the - given distribution. - : list or None - Returns self.sad_spp_list which could be a list of lists or None. - These names are the species names that correspond numerically with - the arrays in within each distribution. Only returned if - return_spp=True. - - ''' - if return_spp: - return super(CompareSED, self).compare_rads(), self.sad_spp_list - else: - return super(CompareSED, self).compare_rads() - - - def compare_cdfs(self, return_spp=False): - ''' - Comparison of species level energy distributions cdfs - - Parameters - ---------- - return_spp : bool - If True, the returns a tuple with a species list as the second - element. - - Returns - ------- - : dict - Has len(self.dist_list) + 1. All the distribution class names - passed to the constructor are key words as well as 'observed' which - references the observed data. Each keyword looks up - a list of arrays. Each list is len(self.ied_list) long and - contains the predicted reds for the empirical data sets for the - given distribution. - : list or None - Returns self.sad_spp_list which could be a list of lists or None. - These names are the species names that correspond numerically with - the arrays within each distribution. Only returned if - return_spp=True. - - ''' - if return_spp: - return super(CompareSED, self).compare_cdfs(), self.sad_spp_list - else: - return super(CompareSED, self).compare_cdfs() - -class CompareASED(CompareDistribution): - ''' - Compares theoretical and observed ased's - - Attributes - ---------- - self.observed_data : list of arrays - Observed average species energy distributions (ASED) - self.sad_spp_list : list of arrays - If not None, each array contains the species strings for the - corresponding SAD in self.sad_list. The length of self.sad_spp_list - should be the same length as self.sad_list and the length of any array - within self.sad_spp_list should be the same length the corresponding array - in self.sad_list. The index of any species name within any array - within self.sad_spp_list references the species count with the same - index in self.sad_list. - self.criteria : a list of dictionaries or None - If not None, each dictionary specifies the divisions made on the plot - that generated each SAD and IED in self.sad_list and self.ied_list. - self.criteria should be the same length as self.sad_list and - self.ied_list. - self.dist_list : a list of distribution objects - Each object is a distribution to which the IEDs in self.ied_list will - be compared. - - ''' - - def __init__(self, data_list, dist_list, patch=False): - ''' - Parameters - ---------- - data_list : list of tuples or output from Patch object - A list containing tuples of length three. The first object in the - tuple is an iterable containing the average energy distribution. - The second object in a tuple an iterable containing the community - individual energy distribution. The third object in a tuple is an - iterable containing the empirical species abundance - distribution.See patch argument in this method for information - about Patch object output. - dist_list : list of strings or objects - Each string corresponds to a name of a ased distribution to which to - compare to the observed data. - patch : bool - If True, expects a tuple of length 3 with the first object being - the complete output from Patch.ased, the second object being - the output from Patch.ied and the third element being the - output from Patch.sad. If False expects what argument data_list - describes. Empirical sads and energy distributions should be made - with the same criteria. - - Notes - ----- - If data_list is a list of tuples containing iterables, the 1st entry - (0th element) in each tuple is considered the observed ASEDs. - ''' - - if patch: - - # Unpack sad. Store spp_lists in items - sad_criteria, sad_list, sad_spp_lists = \ - unpack(data_list[2]) - - # Unpack ased - ased_criteria, ased_list, ased_species = \ - unpack(data_list[0]) - - # Unpack ied - ied_criteria, ied_list, ied_spp = unpack(data_list[1]) - - self.criteria = sad_criteria - self.sad_spp_list = ased_species - - super(CompareASED, self).__init__(zip(ased_list, ied_list, - sad_list), dist_list, 0) - - - else: - super(CompareASED, self).__init__(data_list, dist_list, 0) - -class CompareSAR(object): - ''' - Object allows comparison between species-area relationships - - Attributes - ---------- - self.sar_list : list of arrays - A list of arrays in which each array is the number of species a - given areas. The areas are specified in self.a_list and correspond - exactly self.sar_list. - self.a_list : list of arrays - A list of arrays in which each array is the area (or area fraction) at - which the number of species specified in self.sar_list are found. - Indices correspond exactly with self.sar_list. - self.full_sad : list of arrays - A list of species abundance distributions (SAD) computed at the anchor - scale for each given SAR. The length of self.full_sad should equal the - length of self.sar_list and self.a_list. - self.curve_list : list of objects - A list of SAR curve objects to which the empirical SARs in - self.sar_list will be compared. - - ''' - - def __init__(self, sar_list, curve_list, full_sad, max_a=True, - patch=False): - ''' - Parameters - ---------- - sar_list : list of tuples or list of outputs from Patch().sar - A list of tuples where each tuple contains two array-like objects - of the same length. The first element in the tuple is the - area list and the second element is the species count for the sar. - The maximum area in the area list should be the anchor area from - which the full_sad was generated. If patch=True, accepts the - output from Patch.sar - curve_list : list - A list of SARCurve objects or list of SARCurve object names (str) - full_sad : list of array-like objects - List of complete sads. Each sad corresponds to an element in - sar_list. - max_a : bool - If max_a is True, compare sets all areas to fractions in area_list. - patch : bool - If True, sar_list should be a list of outputs from Patch().sar - ''' - - assert len(sar_list) == len(full_sad), "sar_list and full_sad must " \ - + " be the same length" - self.sar_list = [] - self.a_list = [] - if patch: - for sar_obj in sar_list: - unzipped_sar = unpack(sar_obj[0]) - self.sar_list.append(np.array(unzipped_sar[0])) - self.a_list.append(np.array(unzipped_sar[1])) - else: - unzipped_sar = unpack(sar_list) - self.a_list = [np.array(areas) for areas in unzipped_sar[0]] - self.sar_list = [np.array(sar) for sar in unzipped_sar[1]] - - # Set to area fractions if max_a is true - if max_a: - self.a_list = [ars / np.max(ars) for ars in self.a_list] - - self.full_sad = [np.array(sad) for sad in full_sad] - - self.curve_list = make_dist_list(curve_list) - - - def compare_curves(self, iter_vals=False, use_rad=False, form='sar'): - ''' - Method generates predicted SAR curves from the given observed data and - curve objects for comparison - - Parameters - ---------- - use_rad : bool - If False, uses the sad pmf to calculate the SAR. If True, uses the - sad rank abundance distribution to calculate the SAR. - iter_val : bool - If True, uses the iterative method to calculate SAR. If False uses - the one shot method. - form : string - Default value is 'sar' which calculates the SAR given the - parameters. You can also use 'ear' which calculates the EAR with - the given parameters. - - Returns - ------- - : list of dicts - The list is the same length self.sar_list and each dictionary is - the length of self.curve_list + 1. Each keyword in a dictionary - references either the observed SAR ('observed') or the SAR generate by - one of the curve objects. - - Notes - ----- - If possible, the SARs are computed using an iterative method. - Otherwise, they are calculated with a one-shot method. - ''' - pred_sar = [] - for sar, a, sad in zip(self.sar_list, self.a_list, self.full_sad): - psar = {} - psar['observed'] = np.array(zip(sar, a), dtype=[('items', np.float), - ('area', np.float)]) - for cur in self.curve_list: - cur.fit(sad, (a, sar)) - - if iter_vals: - try: - psar[cur.get_name()] = cur.iter_vals(a, - use_rad=use_rad, form=form) - except AttributeError: - psar[cur.get_name()] = cur.iter_vals(a, use_rad=True, - form=form) - else: - try: - psar[cur.get_name()] = cur.vals(a, use_rad=use_rad, - form=form) - except AttributeError: - psar[cur.get_name()] = cur.vals(a, use_rad=True, - form=form) - - for kw in psar.iterkeys(): - psar[kw].sort(order='area') - pred_sar.append(psar) - return pred_sar - -def nll(pdist): - ''' - Parameters - ---------- - pdist : list of arrays - List of pmf values on which to compute the negative log-likelihood - - Returns - ------- - :list - List of nll values - - ''' - return [-sum(np.log(dist)) for dist in pdist] - - - -def empirical_cdf(emp_data): - ''' - Generates an empirical cdf from empirical data - - Parameters - ---------- - emp_data : array-like object - Empirical data - - Returns - -------- - :ndarray - An empirical cdf - ''' - - emp_data = cnvrt_to_arrays(emp_data)[0] - unq_vals = np.unique(emp_data) - leng = len(emp_data) - cdf = np.empty(len(emp_data)) - count = 0 - for i in unq_vals: - loc = np.where((i == emp_data))[0] - count += len(loc) - cdf[loc] = count / leng - return cdf - -def aic(neg_L, k, loglik=True): - ''' - Calculates the AIC of a given model - - Parameters - ---------- - neg_L : array-like object - The negative log likelihood of the models or a list of pdfs/pmfs, - depending on nll - k : array-like object - The number of parameters of the model - loglik : bool - If True, assumes neg_L is an array-like object of negative log - likelihood. If False, assumes neg_L is a list of pdfs/pmfs. - - Returns - ------- - : float - AIC for a given model - ''' - if loglik: - neg_L, k = cnvrt_to_arrays(neg_L, k) - else: - neg_L = nll(neg_L) - neg_L, k = cnvrt_to_arrays(neg_L, k) - - assert len(k) == len(neg_L), "neg_L and k must have the same length" - aic = (2 * neg_L) + (2 * k) - return aic - -def aicc(neg_L, k, n=None, loglik=True): - ''' - Calculates the corrected AIC of a given model - - Parameters - ---------- - neg_L : array-like object - The negative log likelihood of models or list of pdfs/pmfs - k : array-like object - The number of parameters of models - n : array-like object - Number of observations for each model. Can be left as None if neg_L is - list of pdfs/pmfs and loglik = True - loglik : bool - If True, assumes neg_L is a array-like object of negative log - likelihood. If False, assumes neg_L is a list of pdfs/pmfs. - - Returns - ------- - : np.array - AICc for a given models - - ''' - if loglik: - assert n != None, 'n argument must be given if loglik is True' - neg_L, k, n = cnvrt_to_arrays(neg_L, k, n) - else: - n = np.array([len(tneg_L) for tneg_L in neg_L]) - neg_L = nll(neg_L) - neg_L, k = cnvrt_to_arrays(neg_L, k) - - assert len(neg_L) == len(k) and len(neg_L) == len(n) and len(k) == len(n),\ - "neg_L, k, and n must all have the same length" - aic_value = aic(neg_L, k) - return aic_value + ((2 * k * (k + 1)) / (n - k - 1)) - -def aic_weights(aic_values): - ''' - Calculates the aic_weights for a given set of models - - Parameters - ---------- - aic_values : array-like object - Array-like object containing AIC values from different models - - Returns - ------- - : tuple - First element contains the relative AIC weights, second element - contains the delta AIC values. - - Notes - ----- - AIC weights can be interpreted as the probability that a given model is the - best model in comparison to the other models - - ''' - aic_values = cnvrt_to_arrays(aic_values)[0] - aic_values = np.array(aic_values) - minimum = np.min(aic_values) - delta = np.array([x - minimum for x in aic_values]) - values = np.exp(-delta / 2) - weights = np.array([x / sum(values) for x in values]) - return weights, delta - -def ks_two_sample(data1, data2): - '''Function uses the Kolomogrov-Smirnov two-sample test to determine if the - two samples come from the same distribution. Note that the KS-test is only - valid for continuous distributions - - Parameters - ---------- - data1 : array-like object - Array-like object which contains a set of data to compare - data2 : array-like object - Array-like object which contains a set of data to compare - - Returns - ------- - : tuple - (D-statistic, two-sided p-value) - - ''' - data1, data2 = cnvrt_to_arrays(data1, data2) - data1 = np.array(data1) - data2 = np.array(data2) - return stats.ks_2samp(data1, data2) - -def likelihood_ratio(nll_null, nll_alt, df_list): - ''' - This functions compares of two nested models using the likelihood ratio - test. - - Parameters - ---------- - nll_null : array-like object - The negative log-likelihood of the null model - nll_alt : array-like object - The negative log-likelihood of the alternative model - df_list : array-like object - the degrees of freedom calculated as (number of free parameters in - alternative model) - (number of free parameters in null model) - - Returns - ------- - : list of tuples - (test_statistic, p-value) - - Notes - ----- - The LRT only applies to nested models. The variable test_stat is known as - the G^2 statistic. - ''' - - nll_null, nll_alt, df_list = cnvrt_to_arrays(nll_null, nll_alt, df_list) - assert len(nll_null) == len(nll_alt) and len(nll_null) == len(df_list) and\ - len(nll_alt) == len(df_list), "nll_null, nll_alt, and df_list " + \ - "must have the same length" - # Calculate G^2 statistic - ll_null = nll_null * -1; ll_alt = nll_alt * -1 - test_stat = 2 * (ll_null - ll_alt) - return [(ts, stats.chisqprob(ts, df)) for ts, df in zip(test_stat, df_list)] - -def variance(data_sets): - '''Calculates the variance of the given data_sets - - Parameters - ---------- - data_sets : list - A list of np.arrays on which the kurtosis will be calculated - - ''' - - variance_list = [] - for data in data_sets: - variance_list.append(np.var(data, ddof=1)) - - return variance_list - -def skew(data_sets): - '''Calculates the skew of some given data - - Parameters - ---------- - data_sets : list - A list of np.arrays on which the kurtosis will be calculated - - Returns - ------- - : list - A list of kurtosis values with the same length as data_sets - - ''' - - skewness_list = [] - for data in data_sets: - skewness_list.append(stats.skew(data)) - - return skewness_list - -def kurtosis(data_sets): - '''Calculates the kurtosis using an online algorithm for the given list of - datasets - - Parameters - ---------- - data_sets : list - A list of np.arrays on which the kurtosis will be calculated - - Returns - ------- - : list - A list of kurtosis values with the same length as data_sets - - ''' - kurtosis_list = [] - for data in data_sets: - kurtosis_list.append(stats.kurtosis(data)) - - return kurtosis_list - -def bootstrap(data_sets, num_samp=1000): - '''Bootstrap a data_set within data_sets num_samp times. With replacement - - Parameters - ---------- - data_sets : list - A list of np.arrays on which the kurtosis will be calculated - num_samp : int - Number of bootstrap samples to take - - Returns - ------- - : a list - A list of lists of arrays. Each list contains num_samp bootstrapped - arrays - ''' - - random.seed(time.time()) - - bootstraps = [] - for data in data_sets: - bt_data = [] - n = len(data) - for j in xrange(num_samp): - bt_data.append(np.array([random.choice(data) for j in xrange(n)])) - bootstraps.append(bt_data) - - return bootstraps - -def bootstrap_moment(data1, data2, moment, CI=.95, num_samp=1000): - ''' - A bootstrap two-sample test of a moment. Returns the test_statistic - distribution and the confidence interval as specified by parameter CI. The - confidence interval is the difference of the moment from data1 minus the - moment from data2. - - Parameters - ---------- - data1 : array-like object - An array like object containing data - data2 : array-like object - An array-like object containing data - moment : list - List of strings (mean, skew, kurtosis, and/or variance). - Will calculate the bootstrap CI's for all the moments in the list - CI : float - The desired confidence interval - num_samp : int - Number of bootstrap samples - - Returns - ------- - res : dict - A dictionary with key words equivalent to the strings found in moment. - Each keyword looks up tuple with two elements. The first element is - the observed difference between the moment of data1 and the moment of - data2. The second element is a tuple containing the confidence - interval (lower_bound, upper_bound) on the difference between the - specified moment of data1 and data2. - - Notes - ----- - From the returned confidence interval, one is CI confident that the - returned confidence interval contains the true difference between the - moment of data1 and data2. Therefore, if the confidence interval does not - contain 0 you can be CI confident that the moments are different. - - Bootstrapping in typically only appropriate for sample sizes >= 25. - - - ''' - - data1 = np.array(data1) - data2 = np.array(data2) - - # Bootstrap the data - data1_boot = bootstrap([data1], num_samp=num_samp)[0] - data2_boot = bootstrap([data2], num_samp=num_samp)[0] - - def calc_ci(stat1, stat2): - """ Calculate CI """ - - diff = stat1 - stat2 - lci = (1 - CI) / 2. - uci = 1 - lci - ci = (stats.scoreatpercentile(diff, 100 * lci),\ - stats.scoreatpercentile(diff, 100 * uci)) - return ci - - - res = {} - # Set the higher order moment - if 'skew' in moment: - - stat_1 = np.array(skew(data1_boot)) - stat_2 = np.array(skew(data2_boot)) - - stat_dist = skew([data1])[0] - skew([data2])[0] - ci = calc_ci(stat_1, stat_2) - - res['skew'] = (stat_dist, ci) - - if 'variance' in moment: - stat_1 = np.array(variance(data1_boot)) - stat_2 = np.array(variance(data2_boot)) - - stat_dist = variance([data1])[0] - variance([data2])[0] - ci = calc_ci(stat_1, stat_2) - - res['variance'] = (stat_dist, ci) - - if 'kurtosis' in moment: - stat_1 = np.array(kurtosis(data1_boot)) - stat_2 = np.array(kurtosis(data2_boot)) - - stat_dist = kurtosis([data1])[0] - kurtosis([data2])[0] - ci = calc_ci(stat_1, stat_2) - - res['kurtosis'] = (stat_dist, ci) - - if "mean" in moment: - stat_1 = np.array([np.mean(bs) for bs in data1_boot]) - stat_2 = np.array([np.mean(bs) for bs in data2_boot]) - - stat_dist = np.mean(data1) - np.mean(data2) - ci = calc_ci(stat_1, stat_2) - - res['mean'] = (stat_dist, ci) - - return res - -def mean_squared_error(obs, pred, divide_by_n=True): - ''' - Calculates the mean squared error between observed and predicted data sets. - The data sets must be of the same length - - Parameters - ---------- - obs : array-like object - The observed data - pred : array-like object - The predicted data - divide_by_n : bool - If True, returns mean squared error. If False returns sum of squares - error. - - Returns - ------- - : float - The mean squared error - ''' - - if len(obs) != len(pred): - raise ValueError('obs and pred parameters must have the same length') - - obs, pred = cnvrt_to_arrays(obs, pred) - - if divide_by_n: - return sum((pred - obs)**2) / len(obs) - else: - return sum((pred - obs)**2) - - -def cnvrt_to_arrays(*args): - ''' - Converts all args to np.arrays - ''' - arg_list = [] - for arg in args: - try: - len(arg); arg = np.array(arg) - except: - arg = np.array([arg]) - arg_list.append(arg) - return tuple(arg_list) - -def get_name(obj): - ''' - Return the name of the object - ''' - return obj.__class__.__name__ - -def make_dist_list(dist_list): - ''' - If the dist_list is all strings, eval them. Else return as is - ''' - - if np.all([type(dist) == str for dist in dist_list]): - - ret_dist_list = np.empty(len(dist_list), dtype=object) - - for i, dist_obj in enumerate(dist_list): +from distributions import * - # Clean strings - dist_obj = dist_obj.strip() - try: - ret_dist_list[i] = eval(dist_obj + '()') - except: - # Do this if passing in a gen_sar sad and ssad - # Assumes the sad and ssad are separated by '-' - try: - sad, ssad = tuple(dist_obj.split('-')) - if sad.find('(') != 1 and sad.find(')') != -1: - sad_obj = eval(sad.strip()) - else: - sad_obj = eval(sad.strip() + '()') - if ssad.find('(') != 1 and ssad.find(')') != -1: - ssad_obj = eval(ssad.strip()) - else: - ssad_obj = eval(ssad.strip() + '()') - ret_dist_list[i] = gen_sar(sad_obj, ssad_obj) - except: - raise NameError("Could not evaluate '%s' as an object name" - % dist_obj + '. It may not exist or may be improperly' + - ' formatted. Please check your distribution list in ' - + 'your parameters.xml file or in the dist_list' + - " argument '%s'" % str(dist_list)) - ret_dist_list = list(ret_dist_list) - else: - ret_dist_list = dist_list +def get_AIC(values, params): + """ + Calculate AIC given values of a pdf/pmf and a set of model parameters. + """ + k = len(params) # Num parameters + L = get_nll(values) + return 2*k + 2*L - return ret_dist_list +def get_nll(values): + """ + Calculate negative log likelihood from an array of pdf/pmf values. + """ + return -np.sum(np.log(values)) -def unpack(zipped_data): - ''' - Unpacks zipped data +def get_empirical_cdf(data): - ''' + min, max = 0, np.ceil(np.max(data)) + x = np.arange(min, max+2) # x max is 1 above emp_result max + counts, _ = np.histogram(data, bins=x, normed=True) + emp_cdf = np.cumsum(counts) - unzipped_data = zip(*zipped_data) - unzipped_data = [list(tup) for tup in unzipped_data] - return tuple(unzipped_data) - + return x[:-1], emp_cdf diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 6cfe7a6..73aba43 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -47,8 +47,7 @@ docdict_discrete, docheaders) import scipy.stats.distributions as spdist import scipy.optimize as optim -import scipy.special as spec - +import scipy.special as special _doc_param_note = \ """There are many available methods of `%(name)s`, each of which require one or @@ -97,11 +96,39 @@ class rv_continuous_meco(rv_continuous): Methods ------- + translate_args + takes user-friendly params as input and returns shape params + fit2 calls method `fit` with fixed loc=0 and scale=1 (defaults) """ + def translate_args(self, *args): + """ + Translates user-friendly arguments into shape parameters + + See distribution docstring for description of user arguments and shape + parameters. + + Parameters + ---------- + uargs : floats + User argument(s), usually easily measured and specified + + Returns + ------- + tuple of floats + Shape parameter(s) of distribution + + Notes + ----- + """ + + raise NotImplementedError, ("translate_args method not implemented " + "for this distribution") + + def fit2(self, *args): """ Return MLEs for shape parameters from data. @@ -297,7 +324,7 @@ def translate_args(self, mu, b): return _geom_solve_p_from_mu_vect(mu, b), b @inherit_docstring_from(rv_discrete_meco) - def fit2(self, data, b): + def fit2(self, data, b=None): """%(super)s Requires two arguments consisting of data to fit and ``b``, the upper limit of the distribution (held constant). @@ -316,7 +343,7 @@ def _pmf(self, x, p, b): return pmf def _cdf(self, x, p, b): - k = np.floor(x) + x = np.floor(x) cdf = (1.0-(1.0-p)**(x+1)) / (1.0-(1.0-p)**(b+1)) cdf[x > b] = 1 return cdf @@ -361,9 +388,15 @@ class nbinom_gen(spdist.nbinom_gen): used. %(before_notes)s + uargs : float + distribution mean and k parameter """ + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu, k): + return mu, k + @inherit_docstring_from(rv_discrete_meco) def fit2(self, x, k_range=(0.1,100,0.1)): """%(super)s @@ -397,13 +430,20 @@ def _pmf(self, x, mu, k): def _logpmf(self, x, mu, k): p = self._get_p_from_mu(mu, k) - coeff = spec.gammaln(k+x) - spec.gammaln(x+1) - spec.gammaln(k) + coeff = special.gammaln(k+x)-special.gammaln(x+1)-special.gammaln(k) return coeff + k*np.log(p) + x*np.log(1-p) def _cdf(self, x, mu, k): p = self._get_p_from_mu(mu, k) x = np.floor(x) - return spec.betainc(k, x+1, p) + return special.betainc(k, x+1, p) + + def _ppf(self, q, mu, k): + p = self._get_p_from_mu(mu, k) + vals = np.ceil(special.nbdtrik(q, k, p)) + vals1 = (vals-1).clip(0.0, np.inf) + temp = self._cdf(vals1, k, p) + return np.where(temp >= q, vals1, vals) def _stats(self, mu, k): p = self._get_p_from_mu(mu, k) @@ -453,9 +493,20 @@ class expon_gen(rv_continuous_meco): for ``x >= 0``. %(before_notes)s + uargs : float + distribution mean """ + @inherit_docstring_from(rv_continuous_meco) + def translate_args(self, mu): + return 1 / mu + + @inherit_docstring_from(rv_continuous_meco) + def fit2(self, data): + expon = expon_gen(a=0.0) + return 1/expon.fit(data, floc=0)[2], + def _rvs(self, lam): return nprand.exponential(1/lam, self._size) @@ -485,6 +536,8 @@ class expon_uptrunc_gen(rv_continuous_meco): for ``b >= x >= 0``. %(before_notes)s + uargs : float + distribution mean and upper limit """ @@ -493,6 +546,15 @@ class expon_uptrunc_gen(rv_continuous_meco): # TODO: Do all of these broadcast correctly, or should we call _pdf, etc.? + @inherit_docstring_from(rv_continuous_meco) + def translate_args(self, mu, b): + raise NotImplementedError, "Translation of mu to lam not implemented" + + @inherit_docstring_from(rv_continuous_meco) + def fit2(self, data, b=np.inf): + expon = expon_gen(a=0.0, b=b) + return expon.fit(data, floc=0)[2], b + def _rvs(self, lam, b): expon = expon_gen(a=0.0, b=b) return expon.rvs(lam) @@ -513,11 +575,6 @@ def _stats(self, lam, b): expon = expon_gen(a=0.0, b=b) return expon.stats(lam) - @inherit_docstring_from(rv_discrete_meco) - def fit2(self, data, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.fit(data, lam, floc=0, fscale=1), b - expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 5a0a58c..88b3b2b 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -181,22 +181,22 @@ def parse_criteria(self, criteria): return spp_list, spp_col, count_col, engy_col, mass_col, combinations - def sad(self, criteria, clean=False): + def sad(self, cols, splits, clean=False): ''' Calculates an empirical species abundance distribution given criteria. Parameters ---------- + cols : dict + Identifier with keys for columns to use for species ID (spp_col), + count (count_col), energy (energy_col), and mass (mass_col). Only + spp_col is mandatory. criteria : dict - Dictionary of form {column_name: value}. Must contain a key with a - value of 'species' indicating the column with species identifiers - (this column must be type categorical in metadata). If a column - giving the counts of species found at a point is also in the data, - a key with the value 'count' should also be given. - - Value has a different meaning depending on column type: - - metric - number of divisions of data along this axis, int/float - - categorical - 'split' calculates each category separately, + Keys for column names and value determining how to split column. + Value of 'split' divides into all unique values in column, + especially appropriate for categorical columns. Any other value is + evaluated as an integer giving the number of divisions of data + along this axis clean : bool If True, all the zeros are removed from the sads. If False, sads are left as is. @@ -211,13 +211,22 @@ def sad(self, criteria, clean=False): species in the same order as they appear in the second element of result. ''' + # TODO: Convert all methods to take cols separately + # TODO: Incorporate correct criteria syntax into parameters + # TODO: Ensure that all methods return a list of tuples where first + # element is comb and second is array of data that is the result - spp_list, spp_col, count_col, engy_col, mass, combinations = \ - self.parse_criteria(criteria) + # Define cols and spp_list for whole Patch + for col in ['spp_col', 'count_col', 'energy_col', 'mass_col']: + exec col + " = cols.get(col, None)" + spp_list = np.unique(self.data_table.table[spp_col]) if spp_col == None: raise TypeError('No species column specified in "criteria" ' + 'parameter') + _,_,_,_,_, combinations = \ + self.parse_criteria(splits) + result = [] for comb in combinations: diff --git a/macroeco/main.py b/macroeco/main.py new file mode 100644 index 0000000..e11ab57 --- /dev/null +++ b/macroeco/main.py @@ -0,0 +1,523 @@ +""" +=========================== +Main (:mod:`macroeco.main`) +=========================== + +This module contains functions that execute macroecological analyses specified +by user-generated `parameters.txt` configuration files. Instructions for +creating parameter files can be found here. + +.. autosummary:: + :toctree: generated/ + + main + +""" + +from __future__ import division +import os +import shutil +import inspect +import configparser + +from pandas import DataFrame +import matplotlib.pyplot as plt +from matplotlib.mlab import rec2csv, rec_append_fields + +from empirical import Patch +from distributions2 import * +from compare import * + + +# Dictionary with keys for allowable metrics and func type +metric_types = { + 'sad': 'dist', + 'sar': 'curve', + 'ear': 'curve', + 'ssad': 'dist', +} + + +def main(param_dir, param_file='parameters.txt'): + """ + Entry point function for analysis based on parameter files. + + Parameters + ---------- + param_dir : str + Path to directory containing user-generated parameter file + + """ + + # Get full path and confirm file is present + param_path = os.path.join(param_dir, param_file) + if not os.path.isfile(param_path): + raise IOError, "Parameter file not found at %s" % param_path + + # Read parameter file into params object + params = configparser.ConfigParser() + params.read(param_path) + + # Do analysis for each run with options dict (params + addl options) + run_names = params.sections() + for run_name in run_names: + options = dict(params[run_name]) + options['param_dir'] = os.path.abspath(param_dir) + options['run_dir'] = os.path.join(param_dir, run_name) + options['metric_type'] = _check_metric(options) + _do_analysis(options) + + +def _check_metric(options): + """ + Checks if metric is in options list and returns string for metric type. + + Parameters + ---------- + options : dict + Option names and values for analysis + + Returns + ------- + str + 'dist' for distribution, 'curve' for curve type, None if no metric is + specified. + """ + if not 'metric' in options: + return None + try: + return metric_types[options['metric']] + except Exception: + raise NotImplementedError, ("No analysis for metric %s is currently " + "possible." % options['metric']) + +def _do_analysis(options): + """ + Do analysis for a single run, as specified by options. + + Parameters + ---------- + options : dict + Option names and values for analysis + + """ + + if 'metric' in options: + emp_results = _analyze_empirical(options) + else: + emp_results = None + + if 'models' in options: + mod_results = _analyze_models(options, emp_results) + else: + mod_results = None + + _save_results(options, emp_results, mod_results) + + +def _analyze_empirical(options): + """ + Perform empirical analysis of metric on data set + + Parameters + ---------- + options : dict + Option names and values for analysis + + Returns + ------- + list of tuples + Each tuple corresponds to a combination (see XXX), with first element + of the tuple giving a dictionary describing the combination and the + second element giving the result of the analysis. Any additional + elements are not used. + + """ + # TODO: (In empirical) Create result objects rather than strange lists of + # nested tuples. + + # If no data path is given or data path invalid, raise error + try: + data_path = os.path.normpath(os.path.join(options['param_dir'], + options['data'])) + except Exception: + raise IOError, "Path to data file is invalid." + + # Create Patch object for this data + patch = Patch(data_path) + + # Get cols and splits variable (req by all metrics) and add to options + options['cols'], options['splits'] = _get_cols_splits(options) + + # Get names of args and kwargs to method specified by metric option + exec ("arg_and_kwd_names, _, _, kw_defaults = " + "inspect.getargspec(Patch.%s)" % options['metric']) + arg_names = arg_and_kwd_names[1:-len(kw_defaults)] # Ignore first arg self + kw_names = arg_and_kwd_names[-len(kw_defaults):] + + # Create list with vals for all args and dict with vals for all kwargs + # All required args must be in options + args = [] + for arg_name in arg_names: + try: + exec 'args.append(eval("%s"))' % options[arg_name] + except: + raise ValueError, ("Value for required argument %s not provided" + % arg_name) + + kwargs = {} + for kw_name in kw_names: + try: + exec "kwargs[kw_name]=eval(%s)" % options[kw_name] + except Exception: + pass + + # Call Patch method with appropriate args and return result + return eval("patch.%s(*args, **kwargs)" % options['metric']) + + +def _get_cols_splits(options): + """ + Notes + ----- + Always returns strings, even if dictionary or list is constructed here, to + ensure consistency with provided options. + + """ + + # Splits may be given as option, else is set to empty + if 'splits' in options.keys(): + splits = options['splits'] + else: + splits = {} + + # Cols may be given as option or individual col options may be options + if 'cols' in options.keys(): + cols = options['cols'] + else: + cols = {} + for col in ['spp_col', 'count_col', 'energy_col', 'mass_col']: + cols[col] = options.get(col, None) + + return str(cols), str(splits) + + +def _analyze_models(options, emp_results): + """ + Perform theoretical analysis based on empirical data or options + + Parameters + ---------- + options : dict + Option names and values for analysis + emp_results : list of tuples + Output of method of `empirical.Patch`, or None if no data given + + Returns + ------- + list of tuples + Each tuple corresponds to a combination in emp_result, with one element + in each tuple for the result of each model comparison. The result + object is another tuple of fitted parameters (tuple), values (array), + comparison statistic names (list), and comparison statistic values + (list). + + """ + + if emp_results: + mod_results = _analyze_models_from_data(options, emp_results) + else: + mod_results = _analyze_models_from_options(options) + + return mod_results + + +def _analyze_models_from_data(options, emp_results): + """ + Perform model analysis based on empirical data + + Parameters + ---------- + options : dict + Option names and values for analysis + emp_results : list of tuples + Output of method of `empirical.Patch` + + Returns + ------- + list of dicts + Each dict in the list corresponds to the similarly indexed combination + in emp_result. Dicts have a key for each given model name, with values + that are a four element list of fitted parameters (tuple), values + (array), comparison statistic names (tuple), and comparison statistic + values (tuple). + + """ + + # Get list of model names + models = options['models'].replace(' ', '').split(',') + + # Fit theories to all emp_results + # TODO: Make work for 2D results, i.e., curves, comm_sep, o_ring + # TODO: Make work for curves in general + output_all = [] + for emp_result in emp_results: + output_emp_result = {} + for model in models: + data = emp_result[1] + fits = _get_fits(data, model) + values = _get_values(data, model, fits) + stat_names, stats = _get_comparison_statistic(values, fits) + output_emp_result[model] = [fits, values, stat_names, stats] + output_all.append(output_emp_result) + + return output_all + + +def _analyze_models_from_options(options): + """ + Perform model analysis based on options + + Parameters + ---------- + options : dict + Option names and values for analysis + + Returns + ------- + list of tuples + List of length 1 containing 1 tuple of length 1 (parallel structure to + _analyze_models_with_data). Content of that tuple is fitted parameters + (tuple). + + """ + raise NotImplementedError, "Models cannot be analyzed without data" + + #_get_fits_from_options should call model.translate_args (if exists) + + +def _get_fits(data, model): + return eval("%s.fit2(data)" % model) + + +def _get_values(data, model, fits): + + try: + values = eval("%s.pdf(data, *fits)" % model) + except AttributeError: + values = eval("%s.pmf(data, *fits)" % model) + except: + pass + + return values + +def _get_comparison_statistic(data, fits): + return ['AIC'], [get_AIC(data, fits)] + + +def _save_results(options, emp_results, mod_results): + """ + Save results of analysis as tables and figures + + Parameters + ---------- + options : dict + Option names and values for analysis + emp_results : list + Results of empirical metric analysis from _analyze_empirical + mod_results : list + Results of theoretical metric analysis from _analyze_theoretical + + """ + + # Ensure that output dir for this run exists and is empty + shutil.rmtree(options['run_dir'], ignore_errors=True) + os.makedirs(options['run_dir']) + + # Write outputs depending on pres/abs of emp and mod and dist/curve metric + _write_combination_index_file(options, emp_results) + _write_output(options, emp_results, mod_results) + + +def _write_combination_index_file(options, emp_results): + """ + Write index of combinations table, giving number and combination + """ + + if not emp_results: + return None + + f_path = os.path.join(options['run_dir'], '_combination_index.csv') + with open(f_path, 'a') as f: + for i,emp_result in enumerate(emp_results): + f.write("%i,%s\n" % (i+1, str(emp_result[0]))) + + +def _write_output(options, emp_results, mod_results): + """ + Three groups of output + - Fitted params (always if there is a model) + - Data and pred (always if there is data, although no pred if no models) + - Test statistis (only if both data and model) + """ + + # Get combinations from either emp or mod - if both exist must be same + try: + n_combs = len(emp_results) + except: + n_combs = len(mod_results) + + # Get list of names of models + try: + models = options['models'].replace(' ','').split(",") + except: + models = None + + # Loop through all combinations + for cidx in range(n_combs): + if mod_results: + _write_fitted_params(cidx, models, options, mod_results) + if emp_results: + _write_and_plot_data_pred(cidx, models, options, emp_results, + mod_results) + if mod_results and emp_results: + _write_test_statistics(cidx, models, options, mod_results) + + +def _write_fitted_params(cidx, models, options, mod_results): + + f = open(_get_file_path(cidx, options, "fitted_params.csv"), 'w') + f.write("Model, Fit Parameters\n") + + for model in models: + mod_result = mod_results[cidx][model] + mod_fits = str(mod_result[0])[1:-1] # Drop parens around tuple + f.write("%s,%s\n" % (model, mod_fits)) + f.close() + + +def _write_and_plot_data_pred(cidx, models, options, emp_results, mod_results): + """ + For distributions, will write and plot three kinds of comparisons + - pdf/pmf vs histogram + - cdf vs emp cdf + - rad vs rad + + For curves, we'll only do data vs pred (note will have x and y values) + """ + + if options['metric_type'] == 'dist': + _data_pred_dist(cidx, models, options, emp_results, mod_results) + elif options['metric_type'] == 'curve': + _data_pred_curve(cidx, models, options, emp_results, mod_results) + + +def _data_pred_dist(cidx, models, options, emp_results, mod_results): + """ + These tables have column for data and each model. + - pdf/pmf vs histogram + - cdf vs emp cdf + - rad vs rad + Also make plots for all three + """ + + emp_result = emp_results[cidx][1] + n_vals = len(emp_result) + + # CDF + # TODO: This goes up by integers to max value, can be too large + x, emp_cdf = get_empirical_cdf(emp_result) + + def calc_func(model, x, shapes): + return eval("%s.cdf(x, *shapes)" % model) + + plot_exec_str = "ax.step(x, emp, color='k')" + + _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_cdf', + x, emp_cdf, calc_func, plot_exec_str) + + # RAD + x = np.arange(n_vals)/float(n_vals) + 0.5/float(n_vals) + emp_rad = np.sort(emp_result)[::-1] + + def calc_func(model, x, shapes): + return eval("%s.ppf(x, *shapes)" % model)[::-1] + + plot_exec_str = "ax.step(x * x_plot_mult, emp, color='k')" + + _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_rad', + x, emp_rad, calc_func, plot_exec_str, + x_plot_mult=n_vals) + + # PDF/PMF + hist_bins = 11 + emp_hist, edges = np.histogram(emp_result, hist_bins, normed=True) + x = (np.array(edges[:-1]) + np.array(edges[1:])) / 2 + + def calc_func(model, x, shapes): + try: + return eval("%s.pmf(np.floor(x), *shapes)" % model) + except: + return eval("%s.pdf(x, *shapes)" % model) + + plot_exec_str = "ax.bar(x-width/2, emp, width=width, color='gray')" + + _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_pdf', + x, emp_hist, calc_func, plot_exec_str) + + +def _save_table_and_plot(cidx, models, options, mod_results, name, x, emp, + calc_func, plot_exec_str, x_plot_mult=1): + + f_path = _get_file_path(cidx, options, '%s.csv' % name) + p_path = _get_file_path(cidx, options, '%s.png' % name) + + df = DataFrame({'x': x * x_plot_mult}) + df['empirical'] = emp + for model in models: + mod_result = mod_results[cidx][model] + shapes = mod_result[0] + result = calc_func(model, x, shapes) + df[model] = result + + df.to_csv(f_path, index=False, float_format='%.4f') # Table + + df_plt = df.set_index('x') # Figure + emp = df_plt['empirical'] + df_plt = df_plt.drop('empirical',1) + + width = x[1] - x[0] + ax = df_plt.plot() + exec plot_exec_str + fig = ax.get_figure() + fig.savefig(p_path) + + plt.close('all') + + +def _data_pred_curve(cidx, models, options, emp_results, mod_results): + raise NotImplementedError, "Data and curve comparison not implemented" + + +def _write_test_statistics(cidx, models, options, mod_results): + # TODO: Add delta test statistics columns + + f = open(_get_file_path(cidx, options, "test_statistics.csv"), 'w') + + # Gets stat name list from any element of result dict - same for all models + stat_names_list = next(mod_results[cidx].itervalues())[2] + stat_names_str = str(stat_names_list)[1:-1].strip("'") + + f.write("Theory, %s\n" % stat_names_str) + + for model in models: + mod_result = mod_results[cidx][model] + mod_stats = str(mod_result[3])[1:-1] + f.write("%s,%s\n" % (model, mod_stats)) + f.close() + + +def _get_file_path(cidx, options, file_name): + return os.path.join(options['run_dir'], + '%i_%s' % (cidx+1, file_name)) + diff --git a/macroeco/tests/test_distributions2.py b/macroeco/test_distributions2.py similarity index 100% rename from macroeco/tests/test_distributions2.py rename to macroeco/test_distributions2.py From c8fd5600f81f5612028f589b5eb0f7091c91c6c2 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 15:24:26 -0700 Subject: [PATCH 035/343] Temporarily comment out shapely until fix build to include --- macroeco/empirical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 88b3b2b..ffd87c1 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -36,7 +36,7 @@ from copy import deepcopy from data import DataTable import scipy.spatial.distance as dist -import shapely.geometry as geo +#import shapely.geometry as geo class Patch: From fb68bb09193d2a711a1ab8dee0ad2a04ae994a65 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 15:24:55 -0700 Subject: [PATCH 036/343] Add basic logging and change arg to param file --- macroeco/main.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index e11ab57..b330c51 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -24,6 +24,7 @@ import matplotlib.pyplot as plt from matplotlib.mlab import rec2csv, rec_append_fields +from twiggy_setup import get_log from empirical import Patch from distributions2 import * from compare import * @@ -38,7 +39,7 @@ } -def main(param_dir, param_file='parameters.txt'): +def main(param_path='parameters.txt'): """ Entry point function for analysis based on parameter files. @@ -48,11 +49,15 @@ def main(param_dir, param_file='parameters.txt'): Path to directory containing user-generated parameter file """ - - # Get full path and confirm file is present - param_path = os.path.join(param_dir, param_file) + + # Confirm file is present and extract dir name if not os.path.isfile(param_path): raise IOError, "Parameter file not found at %s" % param_path + param_dir = os.path.dirname(param_path) + + # Get logger and announce start + log = get_log(param_dir, clear=True) + log.info('Starting analysis') # Read parameter file into params object params = configparser.ConfigParser() @@ -61,11 +66,13 @@ def main(param_dir, param_file='parameters.txt'): # Do analysis for each run with options dict (params + addl options) run_names = params.sections() for run_name in run_names: + log.info('Starting run %s' % run_name) options = dict(params[run_name]) options['param_dir'] = os.path.abspath(param_dir) options['run_dir'] = os.path.join(param_dir, run_name) options['metric_type'] = _check_metric(options) _do_analysis(options) + log.info('Finished analysis successfully') def _check_metric(options): From b35343c09a64db7e42d8e033b8cbd89dbba2a337 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 15:25:39 -0700 Subject: [PATCH 037/343] Add spec file to build macroeco desktop --- macroeco/desktop_mac.spec | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 macroeco/desktop_mac.spec diff --git a/macroeco/desktop_mac.spec b/macroeco/desktop_mac.spec new file mode 100644 index 0000000..3955cbc --- /dev/null +++ b/macroeco/desktop_mac.spec @@ -0,0 +1,28 @@ +# -*- mode: python -*- +a = Analysis(['desktop.py'], + pathex=['/Users/jkitzes/Projects/macroeco/macroeco'], + hiddenimports=['scipy.special._ufuncs_cxx'], + hookspath=None, + runtime_hooks=None) +pyz = PYZ(a.pure) +exe = EXE(pyz, + a.scripts, + exclude_binaries=True, + name='desktop', + debug=False, + strip=None, + upx=True, + console=False ) +coll = COLLECT(exe, +a.binaries + [('libwx_osx_cocoau-3.0.0.0.0.dylib', + '/Users/jkitzes/anaconda/pkgs/wxpython-3.0-py27_0/lib/libwx_osx_cocoau-3.0.0.0.0.dylib', + 'BINARY')], + a.zipfiles, + a.datas, + strip=None, + upx=True, + name='desktop') +app = BUNDLE(coll, + name='desktop.app', + icon=None) + From 9056616425d84dd6d30bd56e8813db7aa2cb3110 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 15:26:04 -0700 Subject: [PATCH 038/343] Draft of macroeco desktop GUI module --- macroeco/desktop.py | 140 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100755 macroeco/desktop.py diff --git a/macroeco/desktop.py b/macroeco/desktop.py new file mode 100755 index 0000000..7ed1f3f --- /dev/null +++ b/macroeco/desktop.py @@ -0,0 +1,140 @@ +""" +Macroeco Desktop - A graphical interface for macroeco + +Open file dialog +http://wiki.wxpython.org/Getting%20Started + +Redirecting stdout and stderr +http://blog.pythonlibrary.org/2009/01/01/wxpython-redirecting-stdout-stderr/ + +Process and stdout to window (see Example at link below) +http://wxpython.org/Phoenix/docs/html/Process.html#process +""" + +import wx +import os, sys +import threading as thread + +import main +from twiggy_setup import get_log + +class RedirectText(object): + def __init__(self,aWxTextCtrl): + self.out=aWxTextCtrl + + def write(self,string): + wx.CallAfter(self.out.WriteText, string) + +# Class for window +class MainWindow(wx.Frame): + + def __init__(self, parent, title): + wx.Frame.__init__(self, parent, title=title) + self.t = None + self.dirname = '.' + self.parampath = 'parameters.txt' + self.InitUI() + self.Show(True) + + + def InitUI(self): + + # Header + sizerhead = wx.BoxSizer(wx.HORIZONTAL) + head_font = wx.Font(18, wx.SWISS, wx.NORMAL, wx.BOLD) + heading = wx.StaticText(self, label='Macroeco Desktop') + sizerhead.Add(heading, 0, wx.EXPAND) + #heading.SetFont(head_font) + + # Step 1 + sizer1 = wx.BoxSizer(wx.VERTICAL) + sizer1a = wx.BoxSizer(wx.HORIZONTAL) + sizer1b = wx.BoxSizer(wx.HORIZONTAL) + + choose_text = wx.StaticText(self, + label='1. Choose a parameters file'+' '*20) + + choose_button = wx.Button(self, label='Open') + self.Bind(wx.EVT_BUTTON, self.OnOpen, choose_button) + + # Make attribute so easily modified by other methods + self.choose_msg = wx.StaticText(self, + label='') + #self.choose_msg.SetFont(wx.Font(11, wx.SWISS, wx.SLANT, wx.NORMAL)) + + sizer1a.Add(choose_text, 1, wx.EXPAND) + sizer1a.Add(choose_button, 0, wx.EXPAND) + sizer1b.Add(self.choose_msg, 1, wx.EXPAND) + + sizer1.Add(sizer1a, 0, wx.EXPAND) + sizer1.Add(sizer1b, 0, wx.EXPAND) + + # Step 2 + sizer2 = wx.BoxSizer(wx.HORIZONTAL) + run_text = wx.StaticText(self, + label='2. Run analysis') + self.run_button = wx.Button(self, label='Run') + sizer2.Add(run_text, 1, wx.EXPAND) + sizer2.Add(self.run_button, 0, wx.EXPAND) + + # Updating process + self.process = None + self.Bind(wx.EVT_BUTTON, self.OnRun, self.run_button) + + # Output window + sizerlogbox = wx.BoxSizer(wx.HORIZONTAL) + self.logbox = wx.TextCtrl(self, wx.ID_ANY, size=(400,400), + style = wx.TE_MULTILINE|wx.TE_READONLY|wx.HSCROLL) + sizerlogbox.Add(self.logbox, 1, wx.EXPAND) + + # redirect text here + redir=RedirectText(self.logbox) + sys.stdout=redir + sys.stderr=redir + + # Restore run button + self.Bind(wx.EVT_IDLE, self.OnIdle) + + # All items + sizer_main = wx.BoxSizer(wx.VERTICAL) + sizer_main.Add(sizerhead, 0, wx.EXPAND | wx.ALL, 12) + sizer_main.Add(sizer1, 0, wx.EXPAND | wx.ALL, 12) + sizer_main.Add(sizer2, 0, wx.EXPAND | wx.ALL, 12) + sizer_main.Add(sizerlogbox, 0, wx.EXPAND | wx.ALL, 12) + + # Set up main layout + self.SetSizer(sizer_main) + self.SetAutoLayout(True) + sizer_main.Fit(self) + + def OnOpen(self,e): + self.filename = '' + self.dirname = '' + dlg = wx.FileDialog(self, 'Choose a parameters file', self.dirname, + '', '*.*', wx.OPEN) + if dlg.ShowModal() == wx.ID_OK: + self.filename = dlg.GetFilename() + self.dirname = dlg.GetDirectory() + self.choose_msg.SetLabel(' Parameters file selected') + self.parampath = os.path.join(self.dirname, self.filename) + dlg.Destroy() + + def OnRun(self,e): + self.logbox.SetValue('') + self.RunMain() + + def RunMain(self): + self.run_button.Enable(False) # Turn the run button off + self.t = thread.Thread(target=main.main, args=(self.parampath,)) + self.t.daemon = True # Kills thread if app exits + self.t.start() + + def OnIdle(self, event): + if self.t: # If a thread has been started + if not self.t.is_alive(): # And it's not alive + self.run_button.Enable(True) # Turn the run button on + +if __name__ == '__main__': + app = wx.App(False) + frame = MainWindow(None, 'Macroeco Desktop') + app.MainLoop() From 7d714fbbe179951469e4344e713e95a6f0ad4924 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 15:26:15 -0700 Subject: [PATCH 039/343] Add logging setup module --- macroeco/twiggy_setup.py | 75 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 macroeco/twiggy_setup.py diff --git a/macroeco/twiggy_setup.py b/macroeco/twiggy_setup.py new file mode 100644 index 0000000..3418d5a --- /dev/null +++ b/macroeco/twiggy_setup.py @@ -0,0 +1,75 @@ +import twiggy +import traceback +import sys +import os +import threading as thread + +# Output format for log file - remove traceback prefix +file_format = twiggy.formats.LineFormat(traceback_prefix='') + +# Output format for terminal logging - only text message part +class stdLineFormat(twiggy.formats.LineFormat): + def __call__(self, msg): + text = self.format_text(msg) + print "{text}".format(**locals()) + return "" +std_format = stdLineFormat(traceback_prefix='') + +# Logger setup - returns logger object +def get_log(log_dir='/Users/jkitzes/Desktop/', clear=False): + + # Get path to log file - must be writable (ie, not inside pyinstaller app) + log_path = os.path.join(log_dir,'log.txt') + + # Delete log file if requested + if clear: + try: + os.remove(log_path) + except OSError: + pass + + # Set up outputs for file and stdout + file_output = twiggy.outputs.FileOutput(log_path, format=file_format) + std_output = twiggy.outputs.StreamOutput(format=std_format, + stream=sys.stdout) + + # Create emitters + twiggy.addEmitters(('file', twiggy.levels.DEBUG, None, file_output), + ('stdout', twiggy.levels.INFO, None, std_output)) + + # Declare logger for macroeco + # TODO: Once modules are in subdirs, change to __name__ to log module also + log = twiggy.log.name('meco') + + return log + +# Log uncaught exceptions +log = twiggy.log.name('meco') # If below called before log def elsewhere +def log_uncaught(type1, value1, traceback1): + tb_list = traceback.format_exception(type1, value1, traceback1) + tb_str = ''.join(tb_list) + log.options(suppress_newlines=False).critical('\n'+tb_str) +sys.excepthook = log_uncaught + +# Use proper excepthook for threads also +def installThreadExcepthook(): + """ + Workaround for sys.excepthook thread bug + http://bugs.python.org/issue1230540 + """ + init_old = thread.Thread.__init__ + def init(self, *args, **kwargs): + init_old(self, *args, **kwargs) + run_old = self.run + def run_with_except_hook(*args, **kw): + try: + run_old(*args, **kw) + except (KeyboardInterrupt, SystemExit): + raise + except: + sys.excepthook(*sys.exc_info()) + self.run = run_with_except_hook + thread.Thread.__init__ = init +installThreadExcepthook() + + From 26ac56dd8d169b020346feadf254b29cfc73b293 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 15:27:06 -0700 Subject: [PATCH 040/343] Add build and dist folders to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 3cf34a5..8524faf 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *generated* +*/build/* +*/dist/* From dfb64235d2fe4d061f375971c735cadf8e6260a5 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 19:32:20 -0700 Subject: [PATCH 041/343] Revert to default theme --- doc/conf.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 37d87c2..39bd59a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,7 +17,7 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('..')) -sys.path.insert(0, os.path.abspath('_ext/numpydoc')) +#sys.path.insert(0, os.path.abspath('_ext/numpydoc')) # -- General configuration ----------------------------------------------------- @@ -99,21 +99,21 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'scipy' +#html_theme = 'scipy' # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ['_theme'] +#html_theme_path = ['_theme'] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -html_theme_options = { - "edit_link": False, - "rootlinks": [], - "sidebar": "right", - "scipy_org_logo": True, - "navigation_links": True, -} +#html_theme_options = { +# "edit_link": False, +# "rootlinks": [], +# "sidebar": "right", +# "scipy_org_logo": True, +# "navigation_links": True, +#} # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None From 05d6d41c275b4b1b8e9d8a1dc406bc9e1790334a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 19:37:46 -0700 Subject: [PATCH 042/343] Add pip requirements for RTD --- doc/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/requirements.txt diff --git a/doc/requirements.txt b/doc/requirements.txt new file mode 100644 index 0000000..5e2a6a6 --- /dev/null +++ b/doc/requirements.txt @@ -0,0 +1,4 @@ +scipy +numpy +matplotlib +numpydoc From 3a1a36c823b01ab8bf34e9268db63c567dfdf932 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 19:43:23 -0700 Subject: [PATCH 043/343] Convert empirical to automodule --- doc/empirical.rst | 13 +----------- macroeco/empirical.py | 46 +++++++++++++++---------------------------- 2 files changed, 17 insertions(+), 42 deletions(-) diff --git a/doc/empirical.rst b/doc/empirical.rst index 44fef13..2c30f04 100644 --- a/doc/empirical.rst +++ b/doc/empirical.rst @@ -1,12 +1 @@ -.. currentmodule:: macroeco.empirical - -Empirical -============ - -Some description. - -.. autosummary:: - :toctree: generated/ - - Patch - z +.. automodule:: macroeco.empirical diff --git a/macroeco/empirical.py b/macroeco/empirical.py index ffd87c1..2f35aca 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -1,33 +1,19 @@ -#!/usr/bin/python - -''' -Calculating macroecological metrics for empirical or theoretical patch. Patch -is interpreted broadly as any temporally and spatially defined census. - -Classes -------- -- `Patch` -- empirical metrics for census data - -Patch Methods -------------- -- `sad` -- calculate species abundance distribution (grid or sample) -- `sar` -- calculate species-area relationship (grid or sample) -- `universal_sar` -- calculates the universal sar curve -- `ear` -- calculate endemics-area relationship (grid or sample) -- `comm` -- calculate commonality between sub-patches (grid) -- `ssad` -- calculate species-level spatial abundance distrib (grid or sample) -- `sed` -- calculate species energy distribution (grid or sample) -- `ied` -- calculate the community (individual) energy distribution -- `ased` -- calculate the average species energy distribution -- `tsed` -- calculate the total species energy distribution - -- `get_sp_centers` -- -- 'get_div_areas' -- return list of areas made by div_list - -Misc functions --------------- -- `distance` -- return Euclidean distance between two points -''' +""" +============================================== +Empirical (:mod:`macroeco.empirical`) +============================================== + +This module contains distributions commonly used in analysis of ecological +patterns. At present, all distributions here are univariate. + +.. autosummary:: + :toctree: generated/ + + Patch + z + + +""" from __future__ import division import numpy as np From 3a15391d009ec317f3006f120b3aab6f0be7cd5a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 19:43:44 -0700 Subject: [PATCH 044/343] Remove local copy of numpydoc From b81dce93796429c933c0dec4264cf69670c4ec48 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:00:57 -0700 Subject: [PATCH 045/343] Add macroeco to requirements doc --- doc/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/requirements.txt b/doc/requirements.txt index 5e2a6a6..96d6eea 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,3 +2,4 @@ scipy numpy matplotlib numpydoc +macroeco From 5933ef3cf39208c9b2826352c6a37ff4a149125e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:02:42 -0700 Subject: [PATCH 046/343] Strip all scipy --- macroeco/distributions2.py | 470 +------------------------------------ macroeco/empirical.py | 2 +- 2 files changed, 2 insertions(+), 470 deletions(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 73aba43..33caf8a 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -42,48 +42,7 @@ import numpy as np import numpy.random as nprand -from scipy.misc.doccer import inherit_docstring_from -from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, - docdict_discrete, docheaders) -import scipy.stats.distributions as spdist -import scipy.optimize as optim -import scipy.special as special - -_doc_param_note = \ -"""There are many available methods of `%(name)s`, each of which require one or -more of the parameters listed below. -""" - -_doc_custom_methods = \ -"""fit2(data, %(shapes)s) - MLE estimates of shapes given initial guesses (use instead of `fit`).""" - -_doc_discrete_custom_methods = \ -"""translate_args(uargs) - Get shape parameters from user-friendly args. -fit2(data, %(shapes)s) - MLE estimates of shapes given initial guesses.""" - -# Remove header from all methods -_docdict_allmeth_sh = docdict['allmethods'][16:] -_docdict_discrete_allmeth_sh = docdict_discrete['allmethods'][17:] - -# **kwds in expect string followed by no space was throwing warning -_docdict_allmeth_sh = _docdict_allmeth_sh.replace(', **kwds','') - -docdict['before_notes'] = ''.join([_doc_param_note, - docheaders['methods'], - _doc_custom_methods, - _docdict_allmeth_sh, - docdict['callparams']]) - -docdict_discrete['before_notes'] = ''.join([_doc_param_note, - docheaders['methods'], - _doc_discrete_custom_methods, - _docdict_discrete_allmeth_sh, - docdict_discrete['callparams']]) - - +from scipy.stats.distributions import rv_discrete, rv_continuous class rv_continuous_meco(rv_continuous): """ @@ -153,430 +112,3 @@ def fit2(self, *args): return self.fit(*args, floc=0, fscale=1)[:-2] -class rv_discrete_meco(rv_discrete): - """ - A modified generic discrete random variable class meant for subclassing. - - This class inherits from the `rv_discrete` class of `scipy.stats` and - contains all of its functionality. See the docstring of `rv_discrete` for - information on usage and subclassing. In addition, this class adds two new - methods. - - Methods - ------- - translate_args - takes user-friendly params as input and returns shape params - fit - estimates distribution params from data - - """ - - def translate_args(self, *args): - """ - Translates user-friendly arguments into shape parameters - - See distribution docstring for description of user arguments and shape - parameters. - - Parameters - ---------- - uargs : floats - User argument(s), usually easily measured and specified - - Returns - ------- - tuple of floats - Shape parameter(s) of distribution - - Notes - ----- - """ - - raise NotImplementedError, ("translate_args method not implemented " - "for this distribution") - - - def fit2(self, *args): - """ - Return MLEs for shape parameters from data. - - Parameters - ---------- - data : array_like - Data to use in calculating the MLEs. - args : floats - Subset of shape parameters that are not fit. See Notes. - - Returns - ------- - tuple of floats - MLEs for shape parameters - - Notes - ----- - """ - - raise NotImplementedError, ("fit method not implemented for this " - "distribution") - - -# -# Discrete -# - -class geom_gen(rv_discrete_meco): - r""" - A geometric discrete random variable. - - This implementation of the geometric distribution differs from that in - `scipy.stats`, as the distribution here has support from 0 to inf. - - .. math:: - \mathrm{pmf(x)} = (1-p)^{x} p - - for ``x >= 0``. The location parameter ``loc`` is not used. - - %(before_notes)s - uargs : float - distribution mean - - """ - - @inherit_docstring_from(rv_discrete_meco) - def translate_args(self, mu): - return 1 / (np.array(mu) + 1) - - @inherit_docstring_from(rv_discrete_meco) - def fit2(self, data): - """%(super)s - Requires one argument containing data to fit. - """ - return self.translate_args(np.mean(data)), - - def _argcheck(self, p): - return (p <= 1) & (p >= 0) - - def _pmf(self, x, p): - return (1-p)**x * p - - def _logpmf(self, x, p): - return k*np.log(1-p) + log(p) - - def _cdf(self, x, p): - x = np.floor(x) - return (1.0-(1.0-p)**(x+1)) - - def _stats(self, p): - mu = (1.0 - p) / p - var = (1.0 - p) / p**2 - return mu, var, None, None - -geom = geom_gen(name='geom', shapes='p') - - -class geom_uptrunc_gen(rv_discrete_meco): - r""" - An upper-truncated geometric discrete random variable. - - .. math:: - - \mathrm{pmf(x)} = \frac{(1-p)^{x} p}{1 - (1-p)^{b+1}} - - for ``x >= 0``. - - `geom_uptrunc` takes two shape parameters: ``p`` and ``b``, the upper - limit. The location parameter ``loc`` is not used. - - %(before_notes)s - - uargs : float - distribution mean, upper limit - - Notes - ----- - The boundary ``p = 1`` is a special case in which the ratio between - successive terms of the distribution is 1 (i.e., the pmf is uniform). This - arises when the mean of the distribution is precisely one-half the upper - limit. - - This distribution is known as the Pi distribution in the MaxEnt Theory of - Ecology [#]_, where the ``p`` parameter is known as ``exp(-lambda)``. - - References - ---------- - .. [#] - Harte, J. (2011). Maximum Entropy and Ecology: A Theory of - Abundance, Distribution, and Energetics (p. 264). Oxford, United - Kingdom: Oxford University Press. - - .. - DEV: There is a difficult implicit equation needed to determine the p - parameter from the mu and b arguments. We've employed the brentq solver - here but note that it fails regularly for certain shape combinations. - - """ - - # TODO: Should add a warning for b < 5 or 10 or so (p solver gives erratic - # answers. - - @inherit_docstring_from(rv_discrete_meco) - def translate_args(self, mu, b): - return _geom_solve_p_from_mu_vect(mu, b), b - - @inherit_docstring_from(rv_discrete_meco) - def fit2(self, data, b=None): - """%(super)s - Requires two arguments consisting of data to fit and ``b``, the upper - limit of the distribution (held constant). - """ - # Take mean of data as MLE of distribution mean, then calculate p - mu = np.mean(data) - return _geom_solve_p_from_mu_vect(mu, b), b - - def _argcheck(self, p, b): - # Unlike the traditional geometric, p can be > 0 - return (p >= 0) - - def _pmf(self, x, p, b): - pmf = (1.0-p)**x * p / (1.0-(1.0-p)**(b+1)) - pmf[x > b] = 0 - return pmf - - def _cdf(self, x, p, b): - x = np.floor(x) - cdf = (1.0-(1.0-p)**(x+1)) / (1.0-(1.0-p)**(b+1)) - cdf[x > b] = 1 - return cdf - - def _stats(self, p, b): - mu = (p / (1 - p)) - ((b + 1) / (p**-b - 1)) - return mu, None, None, None - -geom_uptrunc = geom_uptrunc_gen(name='geom_uptrunc', shapes='p, b') - -def _geom_solve_p_from_mu(mu, b): - """ - For the geom_uptrunc, given mu and b, return p. - Ref: Harte 2011, Oxford U Press. Eq. 7.50. - """ - - def p_eq(p, mu, b): - p, mu, b = Decimal(p), Decimal(mu), Decimal(b) - return ( (p / (1 - p)) - ((b + 1) / (p**-b - 1)) - mu ) - - return optim.brentq(p_eq, 1e-9, 20, args=(mu, b), disp=True) - -_geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) - - -class nbinom_gen(spdist.nbinom_gen): - r""" - A negative binomial discrete random variable. - - This implementation of the geometric distribution differs from that in - `scipy.stats`, as the distribution here uses the more common ecological - parameterization. - - .. math:: - - \mathrm{pmf(x)} = - \frac{\Gamma (k + x)}{\Gamma(k) x!} \left(\frac{k}{k+\mu}\right)^k - \left(\frac{\mu}{k+\mu}\right)^x - - for ``x >= 0``. In the traditional parameterization, ``n = k`` (the size - parameter) and ``p = k / (k + mu)``. The location parameter ``loc`` is not - used. - - %(before_notes)s - uargs : float - distribution mean and k parameter - - """ - - @inherit_docstring_from(rv_discrete_meco) - def translate_args(self, mu, k): - return mu, k - - @inherit_docstring_from(rv_discrete_meco) - def fit2(self, x, k_range=(0.1,100,0.1)): - """%(super)s - Requires one argument containing data to fit. A keyword argument - k_range contains a tuple of the start, stop, and step values to search - for k. Default is ``k_range=(0.1,100,0.1)``. - - This method recognizes that the MLE of the mu parameter is simply equal - to the mean of the data. A brute force search is then used to find the - parameter k. - - """ - assert len(x) > 20, "nbinom fit is not stable with <20 data points" - mu = np.mean(x) - return mu, _nbinom_solve_k_from_mu(x, mu, k_range) - - def _get_p_from_mu(self, mu, k): - return k / (k + mu) - - def _rvs(self, mu, k): - p = self._get_p_from_mu(mu, k) - return nprand.negative_binomial(k, p, self._size) - - def _argcheck(self, mu, k): - p = self._get_p_from_mu(mu, k) - return (k >= 0) & (p >= 0) & (p <= 1) - - def _pmf(self, x, mu, k): - p = self._get_p_from_mu(mu, k) - return np.exp(self._logpmf(x, mu, k)) - - def _logpmf(self, x, mu, k): - p = self._get_p_from_mu(mu, k) - coeff = special.gammaln(k+x)-special.gammaln(x+1)-special.gammaln(k) - return coeff + k*np.log(p) + x*np.log(1-p) - - def _cdf(self, x, mu, k): - p = self._get_p_from_mu(mu, k) - x = np.floor(x) - return special.betainc(k, x+1, p) - - def _ppf(self, q, mu, k): - p = self._get_p_from_mu(mu, k) - vals = np.ceil(special.nbdtrik(q, k, p)) - vals1 = (vals-1).clip(0.0, np.inf) - temp = self._cdf(vals1, k, p) - return np.where(temp >= q, vals1, vals) - - def _stats(self, mu, k): - p = self._get_p_from_mu(mu, k) - Q = 1.0 / p - P = Q - 1.0 - mu = k*P - var = k*P*Q - g1 = (Q+P)/np.sqrt(k*P*Q) - g2 = (1.0 + 6*P*Q) / (k*P*Q) - return mu, var, g1, g2 - -nbinom = nbinom_gen(name='nbinom', shapes='mu, k') - -def _nbinom_solve_k_from_mu(x, mu, k_range): - """ - For the nbinom, given mu, return k from searching some k_range. - """ - - # TODO: See if a root finder like fminbound would work with Decimal used in - # logpmf method (will this work with arrays?) - - def nll(x, mu, k): - return -np.sum(nbinom._logpmf(x, mu, k)) - - k_array = np.arange(*k_range) - nll_array = np.zeros(len(k_array)) - - for i in range(len(k_array)): - nll_array[i] = nll(x, mu, k_array[i]) - - min_nll_idx = np.argmin(nll_array) - - return k_array[min_nll_idx] - -# -# Continuous -# - -class expon_gen(rv_continuous_meco): - r""" - An exponential continuous random variable. - - .. math:: - - \mathrm{pdf(x)} = \lambda e^{-\lambda x} - - for ``x >= 0``. - - %(before_notes)s - uargs : float - distribution mean - - """ - - @inherit_docstring_from(rv_continuous_meco) - def translate_args(self, mu): - return 1 / mu - - @inherit_docstring_from(rv_continuous_meco) - def fit2(self, data): - expon = expon_gen(a=0.0) - return 1/expon.fit(data, floc=0)[2], - - def _rvs(self, lam): - return nprand.exponential(1/lam, self._size) - - def _pdf(self, x, lam): - return lam * np.exp(-lam*x) - - def _cdf(self, x, lam): - return 1 - np.exp(-lam*x) - - def _entropy(self, lam): - return 1 - np.ln(lam) - - def _stats(self, lam): - return lam**-1, lam**-2, 2, 6 - -expon = expon_gen(a=0.0, name='expon', shapes='lam') - - -class expon_uptrunc_gen(rv_continuous_meco): - r""" - An upper-truncated exponential continuous random variable. - - .. math:: - - \mathrm{pdf(x)} = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda x}} - - for ``b >= x >= 0``. - - %(before_notes)s - uargs : float - distribution mean and upper limit - - """ - - # Internally, class works by creating a new expon_gen object with the - # appropriate upper limit and calling its methods. - - # TODO: Do all of these broadcast correctly, or should we call _pdf, etc.? - - @inherit_docstring_from(rv_continuous_meco) - def translate_args(self, mu, b): - raise NotImplementedError, "Translation of mu to lam not implemented" - - @inherit_docstring_from(rv_continuous_meco) - def fit2(self, data, b=np.inf): - expon = expon_gen(a=0.0, b=b) - return expon.fit(data, floc=0)[2], b - - def _rvs(self, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.rvs(lam) - - def _pdf(self, x, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.pdf(x, lam) - - def _cdf(self, x, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.cdf(x, lam) - - def _entropy(self, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.entropy(lam) - - def _stats(self, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.stats(lam) - -expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') - - - - diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 2f35aca..cf80f43 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -21,7 +21,7 @@ import itertools from copy import deepcopy from data import DataTable -import scipy.spatial.distance as dist +#import scipy.spatial.distance as dist #import shapely.geometry as geo From f8797ed406c780d1dee2e28945c66add30d09aff Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:04:57 -0700 Subject: [PATCH 047/343] Bring back most scipy --- macroeco/distributions2.py | 470 ++++++++++++++++++++++++++++++++++++- 1 file changed, 469 insertions(+), 1 deletion(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 33caf8a..8a81285 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -42,7 +42,48 @@ import numpy as np import numpy.random as nprand -from scipy.stats.distributions import rv_discrete, rv_continuous +#from scipy.misc.doccer import inherit_docstring_from +from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, + docdict_discrete, docheaders) +import scipy.stats.distributions as spdist +import scipy.optimize as optim +import scipy.special as special + +_doc_param_note = \ +"""There are many available methods of `%(name)s`, each of which require one or +more of the parameters listed below. +""" + +_doc_custom_methods = \ +"""fit2(data, %(shapes)s) + MLE estimates of shapes given initial guesses (use instead of `fit`).""" + +_doc_discrete_custom_methods = \ +"""translate_args(uargs) + Get shape parameters from user-friendly args. +fit2(data, %(shapes)s) + MLE estimates of shapes given initial guesses.""" + +# Remove header from all methods +_docdict_allmeth_sh = docdict['allmethods'][16:] +_docdict_discrete_allmeth_sh = docdict_discrete['allmethods'][17:] + +# **kwds in expect string followed by no space was throwing warning +_docdict_allmeth_sh = _docdict_allmeth_sh.replace(', **kwds','') + +docdict['before_notes'] = ''.join([_doc_param_note, + docheaders['methods'], + _doc_custom_methods, + _docdict_allmeth_sh, + docdict['callparams']]) + +docdict_discrete['before_notes'] = ''.join([_doc_param_note, + docheaders['methods'], + _doc_discrete_custom_methods, + _docdict_discrete_allmeth_sh, + docdict_discrete['callparams']]) + + class rv_continuous_meco(rv_continuous): """ @@ -112,3 +153,430 @@ def fit2(self, *args): return self.fit(*args, floc=0, fscale=1)[:-2] +class rv_discrete_meco(rv_discrete): + """ + A modified generic discrete random variable class meant for subclassing. + + This class inherits from the `rv_discrete` class of `scipy.stats` and + contains all of its functionality. See the docstring of `rv_discrete` for + information on usage and subclassing. In addition, this class adds two new + methods. + + Methods + ------- + translate_args + takes user-friendly params as input and returns shape params + fit + estimates distribution params from data + + """ + + def translate_args(self, *args): + """ + Translates user-friendly arguments into shape parameters + + See distribution docstring for description of user arguments and shape + parameters. + + Parameters + ---------- + uargs : floats + User argument(s), usually easily measured and specified + + Returns + ------- + tuple of floats + Shape parameter(s) of distribution + + Notes + ----- + """ + + raise NotImplementedError, ("translate_args method not implemented " + "for this distribution") + + + def fit2(self, *args): + """ + Return MLEs for shape parameters from data. + + Parameters + ---------- + data : array_like + Data to use in calculating the MLEs. + args : floats + Subset of shape parameters that are not fit. See Notes. + + Returns + ------- + tuple of floats + MLEs for shape parameters + + Notes + ----- + """ + + raise NotImplementedError, ("fit method not implemented for this " + "distribution") + + +# +# Discrete +# + +class geom_gen(rv_discrete_meco): + r""" + A geometric discrete random variable. + + This implementation of the geometric distribution differs from that in + `scipy.stats`, as the distribution here has support from 0 to inf. + + .. math:: + \mathrm{pmf(x)} = (1-p)^{x} p + + for ``x >= 0``. The location parameter ``loc`` is not used. + + %(before_notes)s + uargs : float + distribution mean + + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu): + return 1 / (np.array(mu) + 1) + + @inherit_docstring_from(rv_discrete_meco) + def fit2(self, data): + """%(super)s + Requires one argument containing data to fit. + """ + return self.translate_args(np.mean(data)), + + def _argcheck(self, p): + return (p <= 1) & (p >= 0) + + def _pmf(self, x, p): + return (1-p)**x * p + + def _logpmf(self, x, p): + return k*np.log(1-p) + log(p) + + def _cdf(self, x, p): + x = np.floor(x) + return (1.0-(1.0-p)**(x+1)) + + def _stats(self, p): + mu = (1.0 - p) / p + var = (1.0 - p) / p**2 + return mu, var, None, None + +geom = geom_gen(name='geom', shapes='p') + + +class geom_uptrunc_gen(rv_discrete_meco): + r""" + An upper-truncated geometric discrete random variable. + + .. math:: + + \mathrm{pmf(x)} = \frac{(1-p)^{x} p}{1 - (1-p)^{b+1}} + + for ``x >= 0``. + + `geom_uptrunc` takes two shape parameters: ``p`` and ``b``, the upper + limit. The location parameter ``loc`` is not used. + + %(before_notes)s + + uargs : float + distribution mean, upper limit + + Notes + ----- + The boundary ``p = 1`` is a special case in which the ratio between + successive terms of the distribution is 1 (i.e., the pmf is uniform). This + arises when the mean of the distribution is precisely one-half the upper + limit. + + This distribution is known as the Pi distribution in the MaxEnt Theory of + Ecology [#]_, where the ``p`` parameter is known as ``exp(-lambda)``. + + References + ---------- + .. [#] + Harte, J. (2011). Maximum Entropy and Ecology: A Theory of + Abundance, Distribution, and Energetics (p. 264). Oxford, United + Kingdom: Oxford University Press. + + .. + DEV: There is a difficult implicit equation needed to determine the p + parameter from the mu and b arguments. We've employed the brentq solver + here but note that it fails regularly for certain shape combinations. + + """ + + # TODO: Should add a warning for b < 5 or 10 or so (p solver gives erratic + # answers. + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu, b): + return _geom_solve_p_from_mu_vect(mu, b), b + + @inherit_docstring_from(rv_discrete_meco) + def fit2(self, data, b=None): + """%(super)s + Requires two arguments consisting of data to fit and ``b``, the upper + limit of the distribution (held constant). + """ + # Take mean of data as MLE of distribution mean, then calculate p + mu = np.mean(data) + return _geom_solve_p_from_mu_vect(mu, b), b + + def _argcheck(self, p, b): + # Unlike the traditional geometric, p can be > 0 + return (p >= 0) + + def _pmf(self, x, p, b): + pmf = (1.0-p)**x * p / (1.0-(1.0-p)**(b+1)) + pmf[x > b] = 0 + return pmf + + def _cdf(self, x, p, b): + x = np.floor(x) + cdf = (1.0-(1.0-p)**(x+1)) / (1.0-(1.0-p)**(b+1)) + cdf[x > b] = 1 + return cdf + + def _stats(self, p, b): + mu = (p / (1 - p)) - ((b + 1) / (p**-b - 1)) + return mu, None, None, None + +geom_uptrunc = geom_uptrunc_gen(name='geom_uptrunc', shapes='p, b') + +def _geom_solve_p_from_mu(mu, b): + """ + For the geom_uptrunc, given mu and b, return p. + Ref: Harte 2011, Oxford U Press. Eq. 7.50. + """ + + def p_eq(p, mu, b): + p, mu, b = Decimal(p), Decimal(mu), Decimal(b) + return ( (p / (1 - p)) - ((b + 1) / (p**-b - 1)) - mu ) + + return optim.brentq(p_eq, 1e-9, 20, args=(mu, b), disp=True) + +_geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) + + +class nbinom_gen(spdist.nbinom_gen): + r""" + A negative binomial discrete random variable. + + This implementation of the geometric distribution differs from that in + `scipy.stats`, as the distribution here uses the more common ecological + parameterization. + + .. math:: + + \mathrm{pmf(x)} = + \frac{\Gamma (k + x)}{\Gamma(k) x!} \left(\frac{k}{k+\mu}\right)^k + \left(\frac{\mu}{k+\mu}\right)^x + + for ``x >= 0``. In the traditional parameterization, ``n = k`` (the size + parameter) and ``p = k / (k + mu)``. The location parameter ``loc`` is not + used. + + %(before_notes)s + uargs : float + distribution mean and k parameter + + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu, k): + return mu, k + + @inherit_docstring_from(rv_discrete_meco) + def fit2(self, x, k_range=(0.1,100,0.1)): + """%(super)s + Requires one argument containing data to fit. A keyword argument + k_range contains a tuple of the start, stop, and step values to search + for k. Default is ``k_range=(0.1,100,0.1)``. + + This method recognizes that the MLE of the mu parameter is simply equal + to the mean of the data. A brute force search is then used to find the + parameter k. + + """ + assert len(x) > 20, "nbinom fit is not stable with <20 data points" + mu = np.mean(x) + return mu, _nbinom_solve_k_from_mu(x, mu, k_range) + + def _get_p_from_mu(self, mu, k): + return k / (k + mu) + + def _rvs(self, mu, k): + p = self._get_p_from_mu(mu, k) + return nprand.negative_binomial(k, p, self._size) + + def _argcheck(self, mu, k): + p = self._get_p_from_mu(mu, k) + return (k >= 0) & (p >= 0) & (p <= 1) + + def _pmf(self, x, mu, k): + p = self._get_p_from_mu(mu, k) + return np.exp(self._logpmf(x, mu, k)) + + def _logpmf(self, x, mu, k): + p = self._get_p_from_mu(mu, k) + coeff = special.gammaln(k+x)-special.gammaln(x+1)-special.gammaln(k) + return coeff + k*np.log(p) + x*np.log(1-p) + + def _cdf(self, x, mu, k): + p = self._get_p_from_mu(mu, k) + x = np.floor(x) + return special.betainc(k, x+1, p) + + def _ppf(self, q, mu, k): + p = self._get_p_from_mu(mu, k) + vals = np.ceil(special.nbdtrik(q, k, p)) + vals1 = (vals-1).clip(0.0, np.inf) + temp = self._cdf(vals1, k, p) + return np.where(temp >= q, vals1, vals) + + def _stats(self, mu, k): + p = self._get_p_from_mu(mu, k) + Q = 1.0 / p + P = Q - 1.0 + mu = k*P + var = k*P*Q + g1 = (Q+P)/np.sqrt(k*P*Q) + g2 = (1.0 + 6*P*Q) / (k*P*Q) + return mu, var, g1, g2 + +nbinom = nbinom_gen(name='nbinom', shapes='mu, k') + +def _nbinom_solve_k_from_mu(x, mu, k_range): + """ + For the nbinom, given mu, return k from searching some k_range. + """ + + # TODO: See if a root finder like fminbound would work with Decimal used in + # logpmf method (will this work with arrays?) + + def nll(x, mu, k): + return -np.sum(nbinom._logpmf(x, mu, k)) + + k_array = np.arange(*k_range) + nll_array = np.zeros(len(k_array)) + + for i in range(len(k_array)): + nll_array[i] = nll(x, mu, k_array[i]) + + min_nll_idx = np.argmin(nll_array) + + return k_array[min_nll_idx] + +# +# Continuous +# + +class expon_gen(rv_continuous_meco): + r""" + An exponential continuous random variable. + + .. math:: + + \mathrm{pdf(x)} = \lambda e^{-\lambda x} + + for ``x >= 0``. + + %(before_notes)s + uargs : float + distribution mean + + """ + + @inherit_docstring_from(rv_continuous_meco) + def translate_args(self, mu): + return 1 / mu + + @inherit_docstring_from(rv_continuous_meco) + def fit2(self, data): + expon = expon_gen(a=0.0) + return 1/expon.fit(data, floc=0)[2], + + def _rvs(self, lam): + return nprand.exponential(1/lam, self._size) + + def _pdf(self, x, lam): + return lam * np.exp(-lam*x) + + def _cdf(self, x, lam): + return 1 - np.exp(-lam*x) + + def _entropy(self, lam): + return 1 - np.ln(lam) + + def _stats(self, lam): + return lam**-1, lam**-2, 2, 6 + +expon = expon_gen(a=0.0, name='expon', shapes='lam') + + +class expon_uptrunc_gen(rv_continuous_meco): + r""" + An upper-truncated exponential continuous random variable. + + .. math:: + + \mathrm{pdf(x)} = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda x}} + + for ``b >= x >= 0``. + + %(before_notes)s + uargs : float + distribution mean and upper limit + + """ + + # Internally, class works by creating a new expon_gen object with the + # appropriate upper limit and calling its methods. + + # TODO: Do all of these broadcast correctly, or should we call _pdf, etc.? + + @inherit_docstring_from(rv_continuous_meco) + def translate_args(self, mu, b): + raise NotImplementedError, "Translation of mu to lam not implemented" + + @inherit_docstring_from(rv_continuous_meco) + def fit2(self, data, b=np.inf): + expon = expon_gen(a=0.0, b=b) + return expon.fit(data, floc=0)[2], b + + def _rvs(self, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.rvs(lam) + + def _pdf(self, x, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.pdf(x, lam) + + def _cdf(self, x, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.cdf(x, lam) + + def _entropy(self, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.entropy(lam) + + def _stats(self, lam, b): + expon = expon_gen(a=0.0, b=b) + return expon.stats(lam) + +expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') + + + + From 66e9724993b2ba33b953ca7d070fefa03d3b02fb Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:06:57 -0700 Subject: [PATCH 048/343] Add inheritdocstringfrom --- macroeco/distributions2.py | 57 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 8a81285..c7670d2 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -49,6 +49,63 @@ import scipy.optimize as optim import scipy.special as special +def inherit_docstring_from(cls): + """ + This decorator modifies the decorated function's docstring by + replacing occurrences of '%(super)s' with the docstring of the + method of the same name from the class `cls`. + + If the decorated method has no docstring, it is simply given the + docstring of `cls`s method. + + Parameters + ---------- + cls : Python class or instance + A class with a method with the same name as the decorated method. + The docstring of the method in this class replaces '%(super)s' in the + docstring of the decorated method. + + Returns + ------- + f : function + The decorator function that modifies the __doc__ attribute + of its argument. + + Examples + -------- + In the following, the docstring for Bar.func created using the + docstring of `Foo.func`. + + >>> class Foo(object): + ... def func(self): + ... '''Do something useful.''' + ... return + ... + >>> class Bar(Foo): + ... @inherit_docstring_from(Foo) + ... def func(self): + ... '''%(super)s + ... Do it fast. + ... ''' + ... return + ... + >>> b = Bar() + >>> b.func.__doc__ + 'Do something useful.\n Do it fast.\n ' + + """ + def _doc(func): + cls_docstring = getattr(cls, func.__name__).__doc__ + func_docstring = func.__doc__ + if func_docstring is None: + func.__doc__ = cls_docstring + else: + new_docstring = func_docstring % dict(super=cls_docstring) + func.__doc__ = new_docstring + return func + return _doc + + _doc_param_note = \ """There are many available methods of `%(name)s`, each of which require one or more of the parameters listed below. From 20a6891f263a16089ddd7b43c8d3b06ceadc94b4 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:12:22 -0700 Subject: [PATCH 049/343] Add Patch methods to empirical --- macroeco/empirical.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index cf80f43..01d995b 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -9,10 +9,11 @@ .. autosummary:: :toctree: generated/ - Patch + Patch.__init__ + Patch.ased + Patch.sad z - """ from __future__ import division From 6ff89a071cb28cae8d6980cde1e1ca00817e114c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:19:42 -0700 Subject: [PATCH 050/343] Bring back scipy to empirical and add shapely requirement --- doc/requirements.txt | 1 + macroeco/empirical.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/requirements.txt b/doc/requirements.txt index 96d6eea..40de587 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,4 +2,5 @@ scipy numpy matplotlib numpydoc +shapely macroeco diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 01d995b..bc76569 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -22,7 +22,7 @@ import itertools from copy import deepcopy from data import DataTable -#import scipy.spatial.distance as dist +import scipy.spatial.distance as dist #import shapely.geometry as geo From 5d1ba69f0919a67c63dd1acbc55ec39196886b7e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:26:17 -0700 Subject: [PATCH 051/343] Ignore _build dir (docs) also --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8524faf..a4929d5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *generated* */build/* +*/_build/* */dist/* From cc0700435bd16375ae99d0e014886da3b7eb5d86 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 15 Mar 2014 20:30:43 -0700 Subject: [PATCH 052/343] Remove scipy theme From 185970acdcf2e8f626a998dfda1640865d4b7eaa Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 16 Mar 2014 14:24:05 -0700 Subject: [PATCH 053/343] Updated gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index a4929d5..68f40e4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ */build/* */_build/* */dist/* +*.swp +*.pyc +*.DS_Store +*.pdf From 7ac6feb92f18563b448f0a2bef58bc6750846617 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 16 Mar 2014 14:24:43 -0700 Subject: [PATCH 054/343] Added compare to sphinx docs --- doc/compare.rst | 1 + doc/index.rst | 1 + 2 files changed, 2 insertions(+) create mode 100644 doc/compare.rst diff --git a/doc/compare.rst b/doc/compare.rst new file mode 100644 index 0000000..dcd8cf8 --- /dev/null +++ b/doc/compare.rst @@ -0,0 +1 @@ +.. automodule:: macroeco.compare diff --git a/doc/index.rst b/doc/index.rst index 3828de6..ae9224e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -13,6 +13,7 @@ Welcome to macroeco. empirical distributions2 + compare Indices and tables From 64eca2222de846a73733ec3b7315392960ee14ff Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 16 Mar 2014 14:25:09 -0700 Subject: [PATCH 055/343] Added functions to compare and unittested --- macroeco/compare.py | 135 ++++++- macroeco/test_compare.py | 824 +++++++-------------------------------- 2 files changed, 273 insertions(+), 686 deletions(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index fbf9ecd..f5c3be0 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -1,16 +1,25 @@ """ =========================== -Main (:mod:`macroeco.main`) +Compare (:mod:`macroeco.compare`) =========================== This module contains functions that compare the goodness of fit of a distribution/curve to data or the fit of two distributions/curves to each other. +Comparison Functions +==================== + .. autosummary:: :toctree: generated/ - main + get_AIC + get_AICC + get_AIC_weights + get_nll + get_empirical_cdf + get_sum_of_squares + """ @@ -18,9 +27,13 @@ import numpy as np import scipy.stats as stats +import pandas as pd from distributions import * +# NOTE: get_* functions usually refer to a method within a class. I would +# suggest dropping all of the get prefixes + def get_AIC(values, params): """ @@ -30,6 +43,56 @@ def get_AIC(values, params): L = get_nll(values) return 2*k + 2*L +def get_AICC(values, params): + """ + Calculate AICC given values of a pdf/pmf and a set of model parameters. + + Notes + ----- + Should be used when the number of observations is < 40. + + References + ---------- + .. [#] + Burnham, K and Anderson, D. (2002) Model Selection and Multimodel + Inference: A Practical and Information-Theoretic Approach (p. 66). New + York City, USA: Springer. + + """ + + k = len(params) # Num parameters + n = len(values) # Num observations + return get_AIC(values, params) + (2*k * (k + 1)) / (n - k - 1) + +def get_AIC_weights(aic_values): + """ + Calculates the aic_weights for a given set of models + + Parameters + ---------- + aic_values : array-like object + Array-like object containing AIC values from different models + + Returns + ------- + (weights, delta) : tuple + First element contains the relative AIC weights, second element + contains the delta AIC values. + + Notes + ----- + AIC weights can be interpreted as the probability that a given model is the + best model in comparison to the other models + """ + + aic_values = _to_arrays(aic_values)[0] + minimum = np.min(aic_values) + delta = aic_values - minimum + values = np.exp(-delta / 2) + weights = values / np.sum(values) + + return weights, delta + def get_nll(values): """ Calculate negative log likelihood from an array of pdf/pmf values. @@ -37,10 +100,68 @@ def get_nll(values): return -np.sum(np.log(values)) def get_empirical_cdf(data): + """ + Generates an empirical cdf from empirical data + + Parameters + ---------- + data : array-like object + Empirical data + + Returns + -------- + : array + The empirical cdf corresponding to the inputted data + + """ + + vals = pd.Series(data).value_counts() + ecdf = pd.DataFrame(data).set_index(keys=0) + probs = pd.DataFrame(vals.sort_index().cumsum() / np.float(len(data))) + ecdf = ecdf.join(probs) + + return np.array(ecdf[0]) + +class _gen_loss_function(object): + """ + Generic class for loss function between observed and predicted data + + """ + + def __init__(self, loss_fxn_str): + """ + Parameters + ---------- + loss_fxn_str : string + A Python string representing the loss function between observed + (obs) and predicted (pred). + + Ex. 'np.abs(obs - pred)' or (obs - pred)**2 + """ + self.loss_fxn = loss_fxn_str + + def total_loss(self, obs, pred): + """ + Total loss for observed and predicted + + Parameters + ---------- + obs, pred : array-like objects + observed and predicted data + + Returns + ------- + : float + The sum of the loss function + """ - min, max = 0, np.ceil(np.max(data)) - x = np.arange(min, max+2) # x max is 1 above emp_result max - counts, _ = np.histogram(data, bins=x, normed=True) - emp_cdf = np.cumsum(counts) + obs, pred = _to_arrays(obs, pred) + return np.sum(eval(self.loss_fxn)) - return x[:-1], emp_cdf +get_sum_of_squares = _gen_loss_function('(obs - pred)**2').total_loss + +def _to_arrays(*args): + ''' + Converts all args to np.arrays + ''' + return tuple([np.array(ta) for ta in args]) diff --git a/macroeco/test_compare.py b/macroeco/test_compare.py index 507a6c5..5a5f120 100644 --- a/macroeco/test_compare.py +++ b/macroeco/test_compare.py @@ -1,8 +1,14 @@ #!/usr/bin/python +""" +Tests for compare module -#Testing Compare Module +""" +from __future__ import division + +from numpy.testing import (TestCase, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_, assert_raises) -import unittest from macroeco.compare import * import numpy as np import scipy.stats as stats @@ -10,703 +16,163 @@ import macroeco.distributions as dist import numpy.testing as nt -class TestCompare(unittest.TestCase): - '''Test classes and methods in compare.py''' - - def setUp(self): - self.sad_data = [[1,1,1,1,1,2,3,4,5,6], [2,2,2,2,2,2,2,2,2,2]] - self.ssad_data= [[0,0,0,1,1,2,3,5,12], (0,1,1,1,2,6,12)] - - - def test_CompareSAD_init(self): - - # Test that not passing in patch object object works - sad_c = CompareSAD(self.sad_data, ['logser']) - - # Check that sad_data became self.observed_data - sums = np.array([sum(x) for x in sad_c.observed_data]) - test_sums = np.array([sum(x) for x in self.sad_data]) - self.assertTrue(np.all(sums == test_sums)) - - # Test that that other attributes were set correctly - self.assertTrue(sad_c.criteria == None) - self.assertTrue(sad_c.sad_spp_list == None) - - # Test that distribution object was fit - self.assertTrue(np.all(sad_c.dist_list[0].params['tot_obs'] == - test_sums)) - self.assertTrue(np.all(sad_c.dist_list[0].params['n_samp'] == - np.array([10,10]))) - - # Test if patch is true! - - # Replica of patch output - patch_true = [({'test' : 'criteria'}, np.array([1,1,1,2,3,5]), - np.array(['a', 'b', 'c', 'd', 'e', 'g'])), ({'test' : - 'criteria'}, np.array([1,1,1,2,5]), np.array(['a', 'b', - 'c', 'd', 'g']))] - sad_c = CompareSAD(patch_true, dist_list=['logser'], patch=True) - - # Test that the parsing happened correctly - self.assertTrue(len(sad_c.criteria) == 2) - self.assertTrue(len(sad_c.sad_spp_list) == 2) - self.assertTrue(len(sad_c.observed_data) == 2) - - # Check that parameter values were fit correctly - self.assertTrue(np.all(sad_c.dist_list[0].params['n_samp'] == - np.array([6,5]))) - self.assertTrue(np.all(sad_c.dist_list[0].params['tot_obs'] == - np.array([13, 10]))) - - # Check that the species lists were set correctly - self.assertTrue(np.all(sad_c.sad_spp_list[0] == - np.array(['a', 'b', 'c', 'd', 'e', 'g']))) - self.assertTrue(np.all(sad_c.sad_spp_list[1] == - np.array(['a', 'b', 'c', 'd', 'g']))) - - def test_CompareSSAD_init(self): - - # Test that SSAD parses correctly when patch is False - # Test that not passing in patch object object works - ssad_c = CompareSSAD(self.ssad_data, ['binm']) - - # Check that sad_data became self.observed_data - sums = np.array([sum(x) for x in ssad_c.observed_data]) - test_sums = np.array([sum(x) for x in self.ssad_data]) - self.assertTrue(np.all(sums == test_sums)) - - # Test that that other attributes were set correctly - self.assertTrue(ssad_c.criteria == None) - self.assertTrue(ssad_c.sad_spp_list == None) - - # Test that distribution object was fit - self.assertTrue(np.all(ssad_c.dist_list[0].params['tot_obs'] == - test_sums)) - self.assertTrue(np.all(ssad_c.dist_list[0].params['n_samp'] == - np.array([9,7]))) - - # Test that ssad parses correctly if patch=True - ssad_patch = (np.array([{}, {}, {}, {}, {}]), {'spp1' : - np.array([0,0,1,2,4]), 'spp2' : np.array([1,1,1,1,1])}) - - ssad_c = CompareSSAD(ssad_patch, dist_list = ['tgeo', 'binm'], - patch=True) - - spp_list = np.array(['spp1', 'spp2']) - self.assertTrue(np.all(spp_list == np.sort(ssad_c.sad_spp_list))) - - # Test that distribution object was fit - self.assertTrue(np.all(ssad_c.dist_list[0].params['tot_obs'] == - np.array([7, 5]))) - self.assertTrue(np.all(ssad_c.dist_list[0].params['n_samp'] == - np.array([5,5]))) - # Test that distribution object was fit - self.assertTrue(np.all(ssad_c.dist_list[1].params['tot_obs'] == - np.array([7,5]))) - self.assertTrue(np.all(ssad_c.dist_list[1].params['n_samp'] == - np.array([5,5]))) - - self.assertTrue(len(ssad_c.criteria) == 5) - - def test_CompareIED_init(self): - - # Test the CompareIED init parses correctly - ied_data = [(np.arange(10,100), np.arange(1,40)), (np.arange(1,20), - np.arange(40,60))] - ied_c = CompareIED(ied_data, dist_list=['psi']) - - # Check the first item in tuple became observed data - sums = np.array([sum(x) for x in ied_c.observed_data]) - test_sums = np.array([sum(np.arange(10,100)), sum(np.arange(1,20))]) - self.assertTrue(np.all(sums == test_sums)) - - self.assertTrue(ied_c.criteria == None) - self.assertTrue(ied_c.sad_spp_list == None) - - # Test that distribution object was fit including E parameter - self.assertTrue(np.all(ied_c.dist_list[0].params['tot_obs'] == - np.array([sum(np.arange(1,40)), sum(np.arange(40,60))]))) - self.assertTrue(np.all(ied_c.dist_list[0].params['n_samp'] == - np.array([39,20]))) - self.assertTrue(np.all(ied_c.dist_list[0].params['E'] == - np.array([sum(np.arange(10,100)),sum(np.arange(1,20))]))) - - # If patch is True, make sure the fit works - patch_sad = [({'test' : 'criteria'}, np.array([1,1,1,2,3,5]), - np.array(['a', 'b', 'c', 'd', 'e', 'g'])), ({'test' : - 'criteria'}, np.array([1,1,1,2,5]), np.array(['a', 'b', - 'c', 'd', 'g']))] - - patch_ied = [({}, np.arange(1,40), np.repeat('a', 39)), ({}, - np.arange(1,30), np.repeat('b', 29))] - - ied_c = CompareIED((patch_ied, patch_sad), dist_list=['nu'], patch=True) - - # Check ied_list and spp_list - sad_spp = [np.array(['a', 'b', 'c', 'd', 'e', 'g']), - np.array(['a', 'b', 'c', 'd', 'g'])] - bools = [np.all(a == b) for a,b in zip(np.array(ied_c.sad_spp_list), - np.array(sad_spp))] - self.assertTrue(np.all(bools)) - - ied_spp = [np.repeat('a',39), np.repeat('b',29)] - bools = [np.all(a == b) for a,b in zip(ied_spp, ied_c.ied_spp_lists)] - self.assertTrue(np.all(bools)) - - # check criteria is right length - self.assertTrue(len(ied_c.criteria) == 2) - - # Check that observed data is correct - bools = [np.all(a == b) for a,b in zip(ied_c.observed_data, - [np.arange(1,40), np.arange(1,30)])] - self.assertTrue(np.all(bools)) - - # Check the fit of distribution - self.assertTrue(np.all(ied_c.dist_list[0].params['tot_obs'] == - np.array([13, 10]))) - self.assertTrue(np.all(ied_c.dist_list[0].params['n_samp'] == - np.array([6,5]))) - self.assertTrue(np.all(ied_c.dist_list[0].params['E'] == - np.array([sum(np.arange(1,40)),sum(np.arange(1,30))]))) - - def test_CompareSED_init(self): - - # Test that all attributes are set correctly (sed, ied, sad) - sed_data = [(np.arange(1,20), np.arange(1,40), np.arange(5,25)), - (np.arange(1,30), np.arange(5,30), np.arange(4,64))] - - sed_c = CompareSED(sed_data, dist_list=['theta']) - - # Did other attributes set correctly? - self.assertTrue(sed_c.criteria == None) - self.assertTrue(sed_c.sad_spp_list == None) - - # Check if observed sed data set correctly - test_obs = [np.arange(1,20), np.arange(1,30)] - bools = [np.all(a == b) for a,b in zip(sed_c.observed_data, test_obs)] - self.assertTrue(np.all(bools)) - - # Check that distribution fit correctly - self.assertTrue(np.all(sed_c.dist_list[0].params['tot_obs'] == - np.array([sum(np.arange(5,25)), sum(np.arange(4,64))]))) - self.assertTrue(np.all(sed_c.dist_list[0].params['n_samp'] == - np.array([len(np.arange(5,25)), len(np.arange(4,64))]))) - self.assertTrue(np.all(sed_c.dist_list[0].params['n'] == - np.array([len(np.arange(1,20)), len(np.arange(1,30))]))) - self.assertTrue(np.all(sed_c.dist_list[0].params['E'] == - np.array([sum(np.arange(1,40)), sum(np.arange(5,30))]))) - - # Test if patch == True - patch_sed = [({}, {'a' : np.arange(1,10), 'b' : np.arange(1,20), 'c': - np.arange(1,30), 'd' : np.arange(1,40)}), ({}, - {'a' : np.arange(1,10), 'b' : np.arange(1,20), 'c': - np.arange(1,30), 'd' : np.arange(1,40)})] - - patch_sad = [({}, np.arange(1,50), np.repeat('d',20))] - patch_ied = [({}, np.arange(4,67), np.repeat('y', 60))] - - # An error should be raised if sed,ied, and sad don't have the same - # length - self.assertRaises(IndexError, CompareSED, (patch_sed, patch_ied, - patch_sad), dist_list=['theta'], patch=True) - - - patch_sad = [({}, np.arange(1,50), np.repeat('d',20)), - ({}, np.arange(1,50), np.repeat('d',20))] - patch_ied = [({}, np.arange(4,67), np.repeat('y', 60)), - ({}, np.arange(4,67), np.repeat('y', 60))] - - sed_c = CompareSED((patch_sed, patch_ied, patch_sad), - dist_list=['theta'], patch=True) - - # Check that observed data is set correctly - self.assertTrue(len(sed_c.observed_data) == 8) - test_obs = [np.arange(1,10), np.arange(1,20), np.arange(1,30), - np.arange(1,40)] - test_obs += test_obs - bools = [np.all(a == b) for a,b in zip(test_obs, sed_c.observed_data)] - self.assertTrue(np.all(bool)) - - # Check distributions fit correctly - nt.assert_array_equal(sed_c.dist_list[0].params['n'], np.array([9, - 19, 29, 39, 9, 19, 29, 39])) - nt.assert_array_equal(sed_c.dist_list[0].params['E'], - np.repeat(sum(np.arange(4,67)), 8)) - nt.assert_array_equal(sed_c.dist_list[0].params['tot_obs'], - np.repeat(sum(np.arange(1,50)), 8)) - nt.assert_array_equal(sed_c.dist_list[0].params['n_samp'], - np.repeat(len(np.arange(1,50)), 8)) - - # Check that the species list is correct - nt.assert_array_equal(np.array(['a', 'b', 'c', 'd', 'a', 'b', 'c', - 'd']), np.array(sed_c.sad_spp_list)) - - # Check that criteria is correct length - self.assertTrue(len(sed_c.criteria) == 8) - - def test_CompareASED_init(self): - - # Test that ased fits correctly - - ased_data = [(np.arange(1,10), np.arange(4,56), np.arange(1,20)), - (np.arange(1,34), np.arange(3,20), np.arange(1,56))] - - ased_c = CompareASED(ased_data, dist_list=['nu']) - - # Did other attributes set correctly? - self.assertTrue(ased_c.criteria == None) - self.assertTrue(ased_c.sad_spp_list == None) - - # Check if observed ased data set correctly - test_obs = [np.arange(1,10), np.arange(1,34)] - bools = [np.all(a == b) for a,b in zip(ased_c.observed_data, test_obs)] - self.assertTrue(np.all(bools)) - - # Check that distribution fit correctly - self.assertTrue(np.all(ased_c.dist_list[0].params['tot_obs'] == - np.array([sum(np.arange(1,20)), sum(np.arange(1,56))]))) - self.assertTrue(np.all(ased_c.dist_list[0].params['n_samp'] == - np.array([len(np.arange(1,20)), len(np.arange(1,56))]))) - self.assertTrue(np.all(ased_c.dist_list[0].params['E'] == - np.array([sum(np.arange(4,56)), sum(np.arange(3,20))]))) - - # Test if patch == True - patch_ased = [({}, np.arange(1,50), np.repeat('d',20)), - ({}, np.arange(1,50), np.repeat('e',20))] - patch_sad = [({}, np.arange(1,50), np.repeat('d',20)), - ({}, np.arange(1,50), np.repeat('e',20))] - patch_ied = [({}, np.arange(4,67), np.repeat('y', 60)), - ({}, np.arange(4,67), np.repeat('y', 60))] - - ased_c = CompareASED((patch_ased, patch_ied, patch_sad), - dist_list=['nu'], patch=True) - - # Test that species list is correct - test_spp = [np.repeat('d', 20), np.repeat('e', 20)] - nt.assert_array_equal(test_spp, ased_c.sad_spp_list) - - # Test that observed data is correct - nt.assert_array_equal(ased_c.observed_data, [np.arange(1,50), - np.arange(1,50)]) - - # Test that fit distribution is correct - nt.assert_array_equal(ased_c.dist_list[0].params['tot_obs'], - np.array([1225, 1225])) - nt.assert_array_equal(ased_c.dist_list[0].params['n_samp'], - np.array([49, 49])) - nt.assert_array_equal(ased_c.dist_list[0].params['E'], - np.array([sum(np.arange(4,67)), - sum(np.arange(4,67))])) - - def test_CompareSAR(self): - - # Test if patch == False - area_list = [(np.arange(1,10), np.arange(9,18)), (np.arange(1,10), - np.arange(9,18))] - - full_sad = [np.arange(1,40), np.arange(1,60)] - - sar_c = CompareSAR(area_list, ['mete_sar_iter', 'logser-binm'], - full_sad) - - # Max area should be 1 - nt.assert_array_equal(np.array([1,1]), np.array([np.max(a) for a in - sar_c.a_list])) - - sar_c = CompareSAR(area_list, ['mete_sar_iter', 'logser-binm'], - full_sad, max_a=False) - - # Max area should be 9 - nt.assert_array_equal(np.array([9,9]), np.array([np.max(a) for a in - sar_c.a_list])) - - # Check species numbers - bools = [np.all(a == b) for a,b in zip(sar_c.sar_list, - [np.arange(9,18), np.arange(9,18)])] - self.assertTrue(np.all(bools)) - - # Test if patch == True - - rec_sar = np.array(zip(np.arange(1,8), np.arange(4,11)), - dtype=[('items', np.float), ('area', np.float)]) - - sar_c = CompareSAR([(rec_sar, [])], ['mete_sar_iter'], - [np.arange(1,50)], max_a=False, patch=True) - - # check species numbers - nt.assert_array_equal(np.arange(1,8), sar_c.sar_list[0]) - - # Check area numbers - nt.assert_array_equal(np.arange(4,11), sar_c.a_list[0]) - - # check that error is thrown if curve is bad - self.assertRaises(NameError, CompareSAR, [(rec_sar, [])], ['logser_binm'], - [np.arange(1,50)], max_a=False, patch=True) - - # Test compare_curves method - sar_c = CompareSAR([(rec_sar, [])], ['logser-binm'], - [np.arange(1,50)], patch=True) - - # Test with iter_val=False and use_rad=False and all combos - sar_c.compare_curves() - sar_c.compare_curves(use_rad=True) - sar_c.compare_curves(iter_vals=True, use_rad=False) - sar_c.compare_curves(iter_vals=True, use_rad=True) - - def test_compare_mse(self): - - sad_c = CompareSAD(self.sad_data, ['logser', 'lognorm']) - - # Test that mse output has the appropriate formatted data - mse = sad_c.compare_mse(mse_base='cdf') - self.assertTrue(len(mse) == 2) - self.assertTrue(len(mse['lognorm']) == 2 and len(mse['logser']) == 2) - - # Test the same thing for a rad base - mse = sad_c.compare_mse(mse_base='rad') - self.assertTrue(len(mse) == 2) - self.assertTrue(len(mse['lognorm']) == 2 and len(mse['logser']) == 2) - - # Test is the the distribution has no cdf MSE is set to NaN - sad_c = CompareSAD(self.sad_data, ['logser', 'sugihara']) - mse = sad_c.compare_mse(mse_base='cdf') - self.assertTrue(np.all(np.isnan(mse['sugihara']))) - - # Test that is works for if base = 'rad' - sad_c = CompareSAD(self.sad_data, ['logser', 'sugihara']) - mse = sad_c.compare_mse(mse_base='rad') - self.assertTrue(type(mse['sugihara'][0] == np.float)) - - # Test that compare mse works with ssads - ssad_c = CompareSSAD(self.ssad_data, ['binm', 'tgeo']) - # Test that mse output has the appropriate formatted data - mse = ssad_c.compare_mse(mse_base='cdf') - self.assertTrue(len(mse) == 2) - self.assertTrue(len(mse['binm']) == 2 and len(mse['tgeo']) == 2) - - # Test the same thing for a rad base - mse = ssad_c.compare_mse(mse_base='rad') - self.assertTrue(len(mse) == 2) - self.assertTrue(len(mse['binm']) == 2 and len(mse['tgeo']) == 2) - - def test_compare_rad_cdf(self): - - sad_c = CompareSAD(self.sad_data, ['logser']) - - tdist_list = copy.copy(sad_c.dist_list) - sad_c.dist_list = [] - - # Check that rad, cdf work with empty dist list - rads = sad_c.compare_rads() - cdfs = sad_c.compare_cdfs() - self.assertTrue(len(rads) == 1 and len(cdfs) == 1) - self.assertTrue('observed' in rads and 'observed' in cdfs) - self.assertTrue(rads == sad_c.rads) - self.assertTrue(cdfs == sad_c.cdfs) - - # Check that rad, cdf work with something in dist_list - sad_c.dist_list = tdist_list - sad_c.rads = None - sad_c.cdfs = None - rads = sad_c.compare_rads() - cdfs = sad_c.compare_cdfs() - self.assertTrue(len(rads) == 2 and len(cdfs) == 2) - self.assertTrue('observed' in rads and 'logser' in rads) - self.assertTrue('observed' in cdfs and 'logser' in cdfs) - self.assertTrue(rads == sad_c.rads) - self.assertTrue(cdfs == sad_c.cdfs) - - # Check that if dist doesn't have cdf empty arrays are returned - sad_c = CompareSAD(self.sad_data, ['logser', 'sugihara']) - cdfs = sad_c.compare_cdfs() - self.assertTrue(len(cdfs['sugihara']) == 2) - self.assertTrue(len(cdfs['sugihara'][0]) == 0 and - len(cdfs['sugihara'][1]) == 0) - - # check that observed rads are in the right order - true_vals = np.array([np.all(x == np.array(y)) for x,y in - zip(rads['observed'], self.sad_data)]) - - self.assertTrue(np.all(true_vals)) - - # Testing that SED object returns a species list in compare_rads - patch_sed = [({}, {'a' : np.arange(1,10), 'b' : np.arange(1,20), 'c': - np.arange(1,30), 'd' : np.arange(1,40)}), ({}, - {'a' : np.arange(1,10), 'b' : np.arange(1,20), 'c': - np.arange(1,30), 'd' : np.arange(1,40)})] - - patch_sad = [({}, np.arange(1,50), np.repeat('d',20)), - ({}, np.arange(1,50), np.repeat('d',20))] - patch_ied = [({}, np.arange(4,67), np.repeat('y', 60)), - ({}, np.arange(4,67), np.repeat('y', 60))] - - sed_c = CompareSED((patch_sed, patch_ied, patch_sad), - dist_list=['theta'], patch=True) - - # Both returns should have a species list - cdfs = sed_c.compare_rads(return_spp=True) - rads = sed_c.compare_cdfs(return_spp=True) - nt.assert_array_equal(np.array(['a', 'b', 'c', 'd', 'a', 'b', 'c', - 'd']), np.array(cdfs[1])) - nt.assert_array_equal(np.array(['a', 'b', 'c', 'd', 'a', 'b', 'c', - 'd']), np.array(cdfs[1])) - nt.assert_array_equal(np.array(['a', 'b', 'c', 'd', 'a', 'b', 'c', - 'd']), np.array(rads[1])) - - - def test_compare_aic(self): - - - # Add another distribution and check the order of the AIC output - sad_c = CompareSAD(self.sad_data, ['logser', 'most_even', 'nbd_lt']) - - aic_out = sad_c.compare_aic(crt=True) - print aic_out - - # Most even should have the lowest AIC value for the second dataset - self.assertTrue(aic_out[1][1] == np.min(aic_out[1])) - - aic_m = sad_c.compare_aic_measures(crt=True) - - # Most even should have the a zero delta AIC for the second dataset - self.assertTrue(aic_m[1][1][1] == np.min(aic_m[1][1])) - - # Most even should have the highest wieght for the second dataset - self.assertTrue(aic_m[0][1][1] == np.max(aic_m[0][1])) - - # if I don't have any distributions I should get three empty lists for - # compare_aic_measures - sad_c = CompareSAD(self.sad_data, []) - aic_m = sad_c.compare_aic_measures(crt=True) - self.assertTrue(aic_m == ([],[],[])) - - # If distribution that is passed doesn't have a pmf of pdf, check inf - # aic values are returned - sad_c = CompareSAD(self.sad_data, ['logser', 'sugihara']) - aic_m = sad_c.compare_aic_measures() - self.assertTrue(aic_m[2][0][1] == np.inf and aic_m[2][1][1] == np.inf) - - def test_compare_LRT(self): - - # Testing compare LRT with logser null model - sad_c = CompareSAD(self.sad_data, ['nbd_lt']) - - # Is output properly formatted? - lrt_out = sad_c.compare_LRT(dist.logser()) - self.assertTrue(len(lrt_out) == 1 and 'logser, nbd_lt' in lrt_out) - - def test_compare_rarity(self): - - #Test compare_rarity - - sad_c = CompareSAD(self.sad_data, ['logser', 'most_even', 'nbd_lt']) - rare = sad_c.compare_rarity(1) - - # Observed should have 5 - self.assertTrue(rare['observed'][1][0] == 5) - - # Most even should have 10 species <= 2 - rare = sad_c.compare_rarity((1,2)) - self.assertTrue(rare['observed'][1][0] == 5) - self.assertTrue(rare['most_even'][2][1] == 10) - - def test_compare_moments(self): - - # Test the compare_moments output is formatted correctly - sad_c = CompareSAD(self.sad_data, ['logser', 'nbd_lt']) - mom = sad_c.compare_moments() - self.assertTrue(len(mom) == 3) - - # Test that observed and all distributions are considered - lengths = np.array([len(mom[x]) for x in mom.iterkeys()]) - - self.assertTrue(np.array_equal(lengths, np.repeat(3, 3))) - - def test_summary(self): - - # Test that summary output is correct - # Test is there are no dists in dist_list - sad_c = CompareSAD(self.sad_data, []) - sumry = sad_c.summary() - # Test that there is only observed in summary dict - self.assertTrue(len(sumry) == 1 and 'observed' in sumry) - - # Test if we have two distributions but one doesn't have a cdf - sad_c = CompareSAD(self.sad_data, ['logser', 'sugihara']) - smry = sad_c.summary() - self.assertTrue(len(smry) == 3) - - # Logseries dict and sugihara dict should have 9 kw - self.assertTrue(len(smry['logser']) == 9 and len(smry['sugihara']) == - 9) - - # AIC values for sugihara should in inf - self.assertTrue(np.all(smry['sugihara']['aic'] == np.array([np.inf, - np.inf]))) - # IED should be able to call summary - ied_data = [(np.arange(10,100), np.arange(1,40)), (np.arange(1,20), - np.arange(40,60))] - ied_c = CompareIED(ied_data, dist_list=['psi']) - smry = ied_c.summary() - self.assertTrue(smry['observed']['balls'] == [4905, 190]) +class TestCompare(TestCase): + '''Test Methods in compare.py''' def test_nll(self): # Test against R result: sum(dnorm(c(1,2,3,4,5), log=TRUE)) R_res = 32.09469 test_vals = stats.norm.pdf((1,2,3,4,5)) - lglk = nll([test_vals])[0] - self.assertTrue(R_res == np.round(lglk, decimals=5)) + lglk = get_nll(test_vals) + assert_equal(R_res, np.round(lglk, decimals=5)) def test_empirical_cdf(self): #Test against R's ecdf function + + # Test Case 1 test_data = [1,1,1,1,2,3,4,5,6,6] R_res = [.4,.4,.4,.4,.5,.6,.7,.8,1,1] - res = empirical_cdf(test_data) - self.assertTrue(np.array_equal(R_res, res)) + res = get_empirical_cdf(test_data) + assert_array_equal(R_res, res) + # Test Case 2 test_data = [3,3,3,3] R_res = [1,1,1,1] - res = empirical_cdf(test_data) - self.assertTrue(np.array_equal(R_res, res)) + res = get_empirical_cdf(test_data) + assert_array_equal(R_res, res) def test_aic(self): - # Test that passing either a pmf of nll gives the same result test_vals = stats.norm.pdf((1,2,3,4,5,6,7,8)) - aic1 = aic([test_vals], 2, loglik=False) - aic2 = aic(nll([test_vals]), 2, loglik=True) + aic1 = get_AIC(test_vals, (1,1)) + expected = 222.703016531 # Calculated by hand + assert_equal(np.round(aic1, decimals=9), expected) - self.assertTrue(aic1[0] == aic2[0]) - # Expected AIC for test_vals - expected = 6.837877066 # Calculated by hand - self.assertTrue(np.round(aic1[0], decimals=9), expected) test_vals = stats.gamma.pdf((1,1,1,4,5,7,12),2) - aic1 = aic([test_vals], 2, loglik=False) + aic1 = get_AIC(test_vals, (1,1)) expected = 51.146902 - self.assertTrue(np.round(aic1[0], decimals=6), expected) - - def test_aicc(self): - - # Test that passing either a pmf of nll gives the same result - test_vals = stats.norm.pdf((1,2,3,4,5,6,7,8)) - aic1 = aicc([test_vals], 2, loglik=False) - aic2 = aicc(nll([test_vals]), 2, 8, loglik=True) - - self.assertTrue(aic1[0] == aic2[0]) - - # Test that aicc gives the correct values - expected = 225.10302 - self.assertTrue(expected == np.round(aic1[0], decimals=5)) - - # Test Assertion error is thrown if no n param - self.assertRaises(AssertionError, aicc, 56, 2) - - - def test_aic_weights(self): - - vals = [1,1,1,2,3,4,7,23,78] - aic_vals = aicc([stats.norm.pdf(vals, scale=100), stats.norm.pdf(vals, - scale=99)], - [2,2],loglik=False) - aicw, delta_aic = aic_weights(aic_vals) - pred = np.array([ 0.47909787, 0.52090213]) - self.assertTrue(np.array_equal(np.round(aicw, decimals=8), pred)) - - - def test_ks_two_sample(self): - # Unittested in scipy, testing that this function works - - d, p = ks_two_sample([1,1,2,3,4,5,6,12], [1,2,3,4,5,5,5,5,5,7,8,9]) - - def test_likelihood_ratio(self): - - # Test against what the lrtest() R function returns - model1 = 158.0494 - model0 = 139.806 - R_chisquare = 36.4868 - R_p = 1.537e-09 - - pred_chi, pred_p = likelihood_ratio(model0, model1, 1)[0] - - self.assertTrue(np.round(pred_chi, decimals=4) == R_chisquare) - pred_p = np.round(pred_p, decimals=12) - self.assertTrue(pred_p == R_p) - - - def test_variance(self): - - # Test that I get back the correct values - data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] - expt = [] - expt.append(np.var(data[0], ddof=1)) - expt.append(np.var(data[1], ddof=1)) - resulting_vals = variance(data) - self.assertTrue(np.array_equal(np.array(expt), - np.array(resulting_vals))) - # Using np.var which is optimized and unittested - - def test_skew(self): - - # Using the scipy.stats definition which is optimized and unittested - data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] - expt = [] - expt.append(stats.skew(data[0])) - expt.append(stats.skew(data[1])) - resulting_vals = skew(data) - self.assertTrue(np.array_equal(np.array(expt), - np.array(resulting_vals))) - - def test_kurtosis(self): - - # Using the scipy.stats definition which is optimized and unittested - data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] - expt = [] - expt.append(stats.kurtosis(data[0])) - expt.append(stats.kurtosis(data[1])) - resulting_vals = kurtosis(data) - self.assertTrue(np.array_equal(np.array(expt), - np.array(resulting_vals))) - - def test_mean_square_error(self): - - # Test against R mse function - pred = np.arange(1,9) - obs = np.arange(7, 15) - - comp_val = 36 - pred = mean_squared_error(pred, obs) - self.assertEqual(pred, comp_val) - - def test_bootstrap_moment(self): - - data1 = np.arange(1, 31) - data2 = np.arange(20, 50) - # Test the return is empty if wrong keyword is given - bs_vals = bootstrap_moment(data1, data2, ['men', 'vaiance', - 'sew', 'kurtoss'], num_samp=100) - - self.assertTrue(len(bs_vals) == 0) - - # Test bootstrap moment against William Rice's (UCSB) bootstrap - # programs in Statistics 101. Just testing the mean, but the - # implementation is the same for all of them - test_ci = np.array([-23.4, -14.6]) - - bs_vals = bootstrap_moment(data1, data2, ['mean', 'variance', - 'skew', 'kurtosis'], num_samp=50000) - - # Check that Bill Rice's and our 95% CIs match - self.assertTrue(np.array_equal(test_ci, np.round(bs_vals['mean'][1], - decimals=1))) - - # Check that the deltas match - self.assertTrue(-19 == bs_vals["mean"][0]) - - # Check that the length is right - self.assertTrue(len(bs_vals) == 4) - -if __name__ == '__main__': - unittest.main() + assert_equal(np.round(aic1, decimals=6), expected) + +# def test_aicc(self): +# +# # Test that passing either a pmf of nll gives the same result +# test_vals = stats.norm.pdf((1,2,3,4,5,6,7,8)) +# aic1 = aicc([test_vals], 2, loglik=False) +# aic2 = aicc(nll([test_vals]), 2, 8, loglik=True) +# +# self.assertTrue(aic1[0] == aic2[0]) +# +# # Test that aicc gives the correct values +# expected = 225.10302 +# self.assertTrue(expected == np.round(aic1[0], decimals=5)) +# +# # Test Assertion error is thrown if no n param +# self.assertRaises(AssertionError, aicc, 56, 2) +# +# +# def test_aic_weights(self): +# +# vals = [1,1,1,2,3,4,7,23,78] +# aic_vals = aicc([stats.norm.pdf(vals, scale=100), stats.norm.pdf(vals, +# scale=99)], +# [2,2],loglik=False) +# aicw, delta_aic = aic_weights(aic_vals) +# pred = np.array([ 0.47909787, 0.52090213]) +# self.assertTrue(np.array_equal(np.round(aicw, decimals=8), pred)) +# +# +# def test_ks_two_sample(self): +# # Unittested in scipy, testing that this function works +# +# d, p = ks_two_sample([1,1,2,3,4,5,6,12], [1,2,3,4,5,5,5,5,5,7,8,9]) +# +# def test_likelihood_ratio(self): +# +# # Test against what the lrtest() R function returns +# model1 = 158.0494 +# model0 = 139.806 +# R_chisquare = 36.4868 +# R_p = 1.537e-09 +# +# pred_chi, pred_p = likelihood_ratio(model0, model1, 1)[0] +# +# self.assertTrue(np.round(pred_chi, decimals=4) == R_chisquare) +# pred_p = np.round(pred_p, decimals=12) +# self.assertTrue(pred_p == R_p) +# +# +# def test_variance(self): +# +# # Test that I get back the correct values +# data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] +# expt = [] +# expt.append(np.var(data[0], ddof=1)) +# expt.append(np.var(data[1], ddof=1)) +# resulting_vals = variance(data) +# self.assertTrue(np.array_equal(np.array(expt), +# np.array(resulting_vals))) +# # Using np.var which is optimized and unittested +# +# def test_skew(self): +# +# # Using the scipy.stats definition which is optimized and unittested +# data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] +# expt = [] +# expt.append(stats.skew(data[0])) +# expt.append(stats.skew(data[1])) +# resulting_vals = skew(data) +# self.assertTrue(np.array_equal(np.array(expt), +# np.array(resulting_vals))) +# +# def test_kurtosis(self): +# +# # Using the scipy.stats definition which is optimized and unittested +# data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] +# expt = [] +# expt.append(stats.kurtosis(data[0])) +# expt.append(stats.kurtosis(data[1])) +# resulting_vals = kurtosis(data) +# self.assertTrue(np.array_equal(np.array(expt), +# np.array(resulting_vals))) +# +# def test_mean_square_error(self): +# +# # Test against R mse function +# pred = np.arange(1,9) +# obs = np.arange(7, 15) +# +# comp_val = 36 +# pred = mean_squared_error(pred, obs) +# self.assertEqual(pred, comp_val) +# +# def test_bootstrap_moment(self): +# +# data1 = np.arange(1, 31) +# data2 = np.arange(20, 50) +# # Test the return is empty if wrong keyword is given +# bs_vals = bootstrap_moment(data1, data2, ['men', 'vaiance', +# 'sew', 'kurtoss'], num_samp=100) +# +# self.assertTrue(len(bs_vals) == 0) +# +# # Test bootstrap moment against William Rice's (UCSB) bootstrap +# # programs in Statistics 101. Just testing the mean, but the +# # implementation is the same for all of them +# test_ci = np.array([-23.4, -14.6]) +# +# bs_vals = bootstrap_moment(data1, data2, ['mean', 'variance', +# 'skew', 'kurtosis'], num_samp=50000) +# +# # Check that Bill Rice's and our 95% CIs match +# self.assertTrue(np.array_equal(test_ci, np.round(bs_vals['mean'][1], +# decimals=1))) +# +# # Check that the deltas match +# self.assertTrue(-19 == bs_vals["mean"][0]) +# +# # Check that the length is right +# self.assertTrue(len(bs_vals) == 4) +# From 3b42fd26669f9a785bcd8b70ab2c35ea0105d25f Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 17 Mar 2014 10:53:53 -0700 Subject: [PATCH 056/343] Added r-squared and additional unittests --- macroeco/compare.py | 83 +++++++++++++++++++++++++++++++++++----- macroeco/test_compare.py | 76 +++++++++++++++++++++++------------- 2 files changed, 123 insertions(+), 36 deletions(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index f5c3be0..2191221 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -26,6 +26,7 @@ from __future__ import division import numpy as np +import scipy as sp import scipy.stats as stats import pandas as pd @@ -35,10 +36,19 @@ # suggest dropping all of the get prefixes +def get_nll(values): + """ + Calculate negative log likelihood from an array of pdf/pmf values. + """ + + values = _to_arrays(values)[0] + return -np.sum(np.log(values)) + def get_AIC(values, params): """ Calculate AIC given values of a pdf/pmf and a set of model parameters. """ + values, params = _to_arrays(values, params) k = len(params) # Num parameters L = get_nll(values) return 2*k + 2*L @@ -59,7 +69,8 @@ def get_AICC(values, params): York City, USA: Springer. """ - + + values, params = _to_arrays(values, params) k = len(params) # Num parameters n = len(values) # Num observations return get_AIC(values, params) + (2*k * (k + 1)) / (n - k - 1) @@ -93,11 +104,6 @@ def get_AIC_weights(aic_values): return weights, delta -def get_nll(values): - """ - Calculate negative log likelihood from an array of pdf/pmf values. - """ - return -np.sum(np.log(values)) def get_empirical_cdf(data): """ @@ -122,7 +128,7 @@ def get_empirical_cdf(data): return np.array(ecdf[0]) -class _gen_loss_function(object): +class gen_loss_function(object): """ Generic class for loss function between observed and predicted data @@ -136,7 +142,12 @@ def __init__(self, loss_fxn_str): A Python string representing the loss function between observed (obs) and predicted (pred). - Ex. 'np.abs(obs - pred)' or (obs - pred)**2 + Notes + ----- + + Ex. 'np.abs(obs - pred)' or '(obs - pred)**2' + + """ self.loss_fxn = loss_fxn_str @@ -158,10 +169,62 @@ def total_loss(self, obs, pred): obs, pred = _to_arrays(obs, pred) return np.sum(eval(self.loss_fxn)) -get_sum_of_squares = _gen_loss_function('(obs - pred)**2').total_loss +get_sum_of_squares = gen_loss_function('(obs - pred)**2').total_loss + +def get_r_squared(obs, pred): + """ + + Get's the R^2 value for a regression of observed data (X) and predicted (Y) + + Parameters + ---------- + obs, pred : array-like objects + + Returns + ------- + : float + The R**2 value for the regression of + + """ + + b0, b1, r, p_value, se = stats.linregress(obs, pred) + return r**2 + +def get_ks_two_sample(): + """ + Two sample Kolmogorov Smirnov distribution. Uses the cumulative + distribution functions to test whether two samples were drawn from the same + continuous distribution. Can be a decent approxmiation for discrete data + (CHECK THIS), but the chi-squared test may be more appropriate. + + """ + + pass + +def get_ks_one_sample(): + pass + +def get_lrt(): + pass + +def get_bayes_factor(): + pass + +def get_chi_squared(): + pass + +def bin_data(): + pass + + + + + + def _to_arrays(*args): ''' Converts all args to np.arrays ''' - return tuple([np.array(ta) for ta in args]) + return tuple([np.array(ta) if np.iterable(ta) else np.array([ta]) for ta in + args]) diff --git a/macroeco/test_compare.py b/macroeco/test_compare.py index 5a5f120..89bb8a0 100644 --- a/macroeco/test_compare.py +++ b/macroeco/test_compare.py @@ -56,32 +56,56 @@ def test_aic(self): expected = 51.146902 assert_equal(np.round(aic1, decimals=6), expected) -# def test_aicc(self): -# -# # Test that passing either a pmf of nll gives the same result -# test_vals = stats.norm.pdf((1,2,3,4,5,6,7,8)) -# aic1 = aicc([test_vals], 2, loglik=False) -# aic2 = aicc(nll([test_vals]), 2, 8, loglik=True) -# -# self.assertTrue(aic1[0] == aic2[0]) -# -# # Test that aicc gives the correct values -# expected = 225.10302 -# self.assertTrue(expected == np.round(aic1[0], decimals=5)) -# -# # Test Assertion error is thrown if no n param -# self.assertRaises(AssertionError, aicc, 56, 2) -# -# -# def test_aic_weights(self): -# -# vals = [1,1,1,2,3,4,7,23,78] -# aic_vals = aicc([stats.norm.pdf(vals, scale=100), stats.norm.pdf(vals, -# scale=99)], -# [2,2],loglik=False) -# aicw, delta_aic = aic_weights(aic_vals) -# pred = np.array([ 0.47909787, 0.52090213]) -# self.assertTrue(np.array_equal(np.round(aicw, decimals=8), pred)) + def test_aicc(self): + + # Test values + test_vals = stats.norm.pdf((1,2,3,4,5,6,7,8)) + aic1 = get_AICC(test_vals, (1,1)) + + # Test that aicc gives the correct values + expected = 225.10302 + assert_equal(expected, np.round(aic1, decimals=5)) + + def test_aic_weights(self): + + # Test values + vals = [1,1,1,2,3,4,7,23,78] + values = [stats.norm.pdf(vals, scale=100), stats.norm.pdf(vals, + scale=99)] + + aic_vals = [get_AICC(tval, 1) for tval in values] + aicw, delta_aic = get_AIC_weights(aic_vals) + pred = np.array([ 0.47909787, 0.52090213]) + assert_array_almost_equal(aicw, pred) + + def test_gen_loss_function(self): + + # Test absolute value loss function + loss_fxn = 'np.abs(obs - pred)' + loss = gen_loss_function(loss_fxn) + + obs = np.random.randint(3, 59, 100) + pred = np.random.randint(3, 59, 100) + test_loss = np.sum(np.abs(obs - pred)) + + pred_loss = loss.total_loss(obs, pred) + assert_equal(pred_loss, test_loss) + + # Test sum of squares loss function + test_loss = np.sum((obs - pred)**2) + pred_loss = get_sum_of_squares(obs, pred) + assert_equal(test_loss, pred_loss) + + # Test MSE loss function + loss_fxn = 'np.abs(obs - pred) / len(obs)' + loss = gen_loss_function(loss_fxn) + + test_loss = np.sum(np.abs(obs - pred) / len(obs)) + pred_loss = loss.total_loss(obs, pred) + assert_equal(test_loss, pred_loss) + + + # # # def test_ks_two_sample(self): From f4be4111c7570f9ad181bb75edbc3ba4169e4263 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 17 Mar 2014 15:31:17 -0700 Subject: [PATCH 057/343] Added chi-squared test --- macroeco/compare.py | 66 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index 2191221..d49d660 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -210,18 +210,74 @@ def get_lrt(): def get_bayes_factor(): pass -def get_chi_squared(): - pass +def get_chi_squared(dists): + """ + Chi-squared test to compare two or more distributions. -def bin_data(): - pass + Parameters + ------------------ + dists : list + List of distributions to compare. Each distribution in list should be + the same length and the location of each value in a list should be + compareable. This list will be made into a Chi-Squared contingency + table to analyze. + + Returns + ------------ + chi2 : float + The test statistic. + p : float + The p-value of the test + dof : int + Degrees of freedom + expected : ndarray, same shape as `observed` + The expected frequencies, based on the marginal sums of the table. + Notes + --------- + Assumption of the Chi-squared test is that the expected value of 80% of + the cells is > 5. If this does not hold, the Normal approximation is not + valid and you should try an alternative approach. + """ + assert len(dists) > 1, "Length of dists must be greater than 1" + test_len = len(dists[0]) + assert np.all([len(dt) == test_len for dit in dists], "All dists must have" + + " equal length" + chi_table = np.array(dists, dtype=np.float) + chi2, p, dof, expected = stats.chi2_contingency(chi_table, correction=False) + return chi2, p, dof, expected + +def bin_data(data, max_num): + """ + Bins the data on base 2. Bins such that the right boundary is exlusive and + the left boundary is inclusive. Does not split density between bins. + + Parameters + ------------------ + data : array-like + Data to be binned + + max_num : float + The maximum upper most boundary of the data + + base : float + The base for log binning + + Returns + ------------ + tuple : (binned_data, bins_edges) + + """ + log_ub = np.ceil(np.log2(max_num)) + boundaries = 2**np.arange(0, log_ub + 1) + + hist_data = np.histogram(data, bins=boundaries) + return hist_data - def _to_arrays(*args): ''' Converts all args to np.arrays From 2d8a58380d00c1c34ef89af4607456dfa28540dc Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 18 Mar 2014 19:41:48 -0700 Subject: [PATCH 058/343] Unittested all current functions in compare.py --- macroeco/compare.py | 70 +++++++++++++++++---------- macroeco/test_compare.py | 100 +++++++++++++++++++++++++++++++-------- 2 files changed, 126 insertions(+), 44 deletions(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index d49d660..c5db2a5 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -4,7 +4,7 @@ =========================== This module contains functions that compare the goodness of fit of a -distribution/curve to data or the fit of two distributions/curves to each +distribution/curve to data or the fit of two distributions/curves to each other. Comparison Functions @@ -44,6 +44,7 @@ def get_nll(values): values = _to_arrays(values)[0] return -np.sum(np.log(values)) + def get_AIC(values, params): """ Calculate AIC given values of a pdf/pmf and a set of model parameters. @@ -51,7 +52,8 @@ def get_AIC(values, params): values, params = _to_arrays(values, params) k = len(params) # Num parameters L = get_nll(values) - return 2*k + 2*L + return 2 * k + 2 * L + def get_AICC(values, params): """ @@ -69,11 +71,12 @@ def get_AICC(values, params): York City, USA: Springer. """ - + values, params = _to_arrays(values, params) k = len(params) # Num parameters n = len(values) # Num observations - return get_AIC(values, params) + (2*k * (k + 1)) / (n - k - 1) + return get_AIC(values, params) + (2 * k * (k + 1)) / (n - k - 1) + def get_AIC_weights(aic_values): """ @@ -83,7 +86,7 @@ def get_AIC_weights(aic_values): ---------- aic_values : array-like object Array-like object containing AIC values from different models - + Returns ------- (weights, delta) : tuple @@ -97,7 +100,7 @@ def get_AIC_weights(aic_values): """ aic_values = _to_arrays(aic_values)[0] - minimum = np.min(aic_values) + minimum = np.min(aic_values) delta = aic_values - minimum values = np.exp(-delta / 2) weights = values / np.sum(values) @@ -112,7 +115,7 @@ def get_empirical_cdf(data): Parameters ---------- data : array-like object - Empirical data + Empirical data Returns -------- @@ -128,6 +131,7 @@ def get_empirical_cdf(data): return np.array(ecdf[0]) + class gen_loss_function(object): """ Generic class for loss function between observed and predicted data @@ -147,13 +151,12 @@ def __init__(self, loss_fxn_str): Ex. 'np.abs(obs - pred)' or '(obs - pred)**2' - """ self.loss_fxn = loss_fxn_str def total_loss(self, obs, pred): """ - Total loss for observed and predicted + Total loss for observed and predicted Parameters ---------- @@ -171,6 +174,7 @@ def total_loss(self, obs, pred): get_sum_of_squares = gen_loss_function('(obs - pred)**2').total_loss + def get_r_squared(obs, pred): """ @@ -183,36 +187,41 @@ def get_r_squared(obs, pred): Returns ------- : float - The R**2 value for the regression of + The R**2 value for the regression of observed on predicted """ b0, b1, r, p_value, se = stats.linregress(obs, pred) - return r**2 + return r ** 2 + def get_ks_two_sample(): """ Two sample Kolmogorov Smirnov distribution. Uses the cumulative distribution functions to test whether two samples were drawn from the same continuous distribution. Can be a decent approxmiation for discrete data - (CHECK THIS), but the chi-squared test may be more appropriate. + (CHECK THIS), but the chi-squared test may be more appropriate. """ pass + def get_ks_one_sample(): pass + def get_lrt(): pass + def get_bayes_factor(): pass + def get_chi_squared(dists): """ - Chi-squared test to compare two or more distributions. + Chi-squared test to compare two or more distributions. Parameters ------------------ @@ -238,41 +247,52 @@ def get_chi_squared(dists): Assumption of the Chi-squared test is that the expected value of 80% of the cells is > 5. If this does not hold, the Normal approximation is not valid and you should try an alternative approach. + + If all of the cells in a column contain zero and error will because teh + expected value of the cell is 0. """ assert len(dists) > 1, "Length of dists must be greater than 1" test_len = len(dists[0]) - assert np.all([len(dt) == test_len for dit in dists], "All dists must have" - + " equal length" + assert np.all([len(dt) == test_len for dt in dists]), \ + "All dists must have equal length" chi_table = np.array(dists, dtype=np.float) - chi2, p, dof, expected = stats.chi2_contingency(chi_table, correction=False) + chi2, p, dof, expected = stats.chi2_contingency(chi_table, + correction=False) return chi2, p, dof, expected + def bin_data(data, max_num): """ - Bins the data on base 2. Bins such that the right boundary is exlusive and - the left boundary is inclusive. Does not split density between bins. + Bins the data on base 2. Uses Preston's method of binning which has + exclusive lower boundaries and inclusive upper boundaries. Densities are + not split between bins. Parameters ------------------ data : array-like - Data to be binned + Data to be binned max_num : float The maximum upper most boundary of the data - base : float - The base for log binning - Returns ------------ tuple : (binned_data, bins_edges) - """ log_ub = np.ceil(np.log2(max_num)) - boundaries = 2**np.arange(0, log_ub + 1) + + # Make an exclusive lower bound in keeping with Preston + if log_ub == 0: + boundaries = np.array([0, 1]) + elif log_ub == 1: + boundaries = np.arange(1, 4) + else: + boundaries = 2 ** np.arange(0, log_ub + 1) + boundaries = np.insert(boundaries, 2, 3) + boundaries[3:] = boundaries[3:] + 1 hist_data = np.histogram(data, bins=boundaries) return hist_data @@ -283,4 +303,4 @@ def _to_arrays(*args): Converts all args to np.arrays ''' return tuple([np.array(ta) if np.iterable(ta) else np.array([ta]) for ta in - args]) + args]) diff --git a/macroeco/test_compare.py b/macroeco/test_compare.py index 89bb8a0..7e29c47 100644 --- a/macroeco/test_compare.py +++ b/macroeco/test_compare.py @@ -12,10 +12,9 @@ from macroeco.compare import * import numpy as np import scipy.stats as stats -import copy -import macroeco.distributions as dist import numpy.testing as nt + class TestCompare(TestCase): '''Test Methods in compare.py''' @@ -23,7 +22,7 @@ def test_nll(self): # Test against R result: sum(dnorm(c(1,2,3,4,5), log=TRUE)) R_res = 32.09469 - test_vals = stats.norm.pdf((1,2,3,4,5)) + test_vals = stats.norm.pdf((1, 2, 3, 4, 5)) lglk = get_nll(test_vals) assert_equal(R_res, np.round(lglk, decimals=5)) @@ -32,35 +31,34 @@ def test_empirical_cdf(self): #Test against R's ecdf function # Test Case 1 - test_data = [1,1,1,1,2,3,4,5,6,6] - R_res = [.4,.4,.4,.4,.5,.6,.7,.8,1,1] + test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] + R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] res = get_empirical_cdf(test_data) assert_array_equal(R_res, res) # Test Case 2 - test_data = [3,3,3,3] - R_res = [1,1,1,1] + test_data = [3, 3, 3, 3] + R_res = [1, 1, 1, 1] res = get_empirical_cdf(test_data) assert_array_equal(R_res, res) def test_aic(self): - test_vals = stats.norm.pdf((1,2,3,4,5,6,7,8)) - aic1 = get_AIC(test_vals, (1,1)) + test_vals = stats.norm.pdf((1, 2, 3, 4, 5, 6, 7, 8)) + aic1 = get_AIC(test_vals, (1, 1)) expected = 222.703016531 # Calculated by hand assert_equal(np.round(aic1, decimals=9), expected) - - test_vals = stats.gamma.pdf((1,1,1,4,5,7,12),2) - aic1 = get_AIC(test_vals, (1,1)) + test_vals = stats.gamma.pdf((1, 1, 1, 4, 5, 7, 12), 2) + aic1 = get_AIC(test_vals, (1, 1)) expected = 51.146902 assert_equal(np.round(aic1, decimals=6), expected) def test_aicc(self): # Test values - test_vals = stats.norm.pdf((1,2,3,4,5,6,7,8)) - aic1 = get_AICC(test_vals, (1,1)) + test_vals = stats.norm.pdf((1, 2, 3, 4, 5, 6, 7, 8)) + aic1 = get_AICC(test_vals, (1, 1)) # Test that aicc gives the correct values expected = 225.10302 @@ -69,13 +67,13 @@ def test_aicc(self): def test_aic_weights(self): # Test values - vals = [1,1,1,2,3,4,7,23,78] + vals = [1, 1, 1, 2, 3, 4, 7, 23, 78] values = [stats.norm.pdf(vals, scale=100), stats.norm.pdf(vals, scale=99)] aic_vals = [get_AICC(tval, 1) for tval in values] aicw, delta_aic = get_AIC_weights(aic_vals) - pred = np.array([ 0.47909787, 0.52090213]) + pred = np.array([0.47909787, 0.52090213]) assert_array_almost_equal(aicw, pred) def test_gen_loss_function(self): @@ -92,18 +90,82 @@ def test_gen_loss_function(self): assert_equal(pred_loss, test_loss) # Test sum of squares loss function - test_loss = np.sum((obs - pred)**2) + test_loss = np.sum((obs - pred) ** 2) pred_loss = get_sum_of_squares(obs, pred) assert_equal(test_loss, pred_loss) - # Test MSE loss function + # Test MSE loss function loss_fxn = 'np.abs(obs - pred) / len(obs)' loss = gen_loss_function(loss_fxn) test_loss = np.sum(np.abs(obs - pred) / len(obs)) pred_loss = loss.total_loss(obs, pred) assert_equal(test_loss, pred_loss) - + + def test_r_squared(self): + + # Already unittested in scipy. Checking for functionaliity + test_data = np.random.randint(5, 100, 100) + rsq = get_r_squared(test_data, test_data) + assert_equal(rsq, 1) + + def test_chi_squared(self): + + # Compare two distributions + # Chi squared function itself is already unittested in scipy + + bin_max = 16 + p = 0.99 + dist1 = stats.logser(p=p).rvs(100) + dist2 = stats.logser(p=p).rvs(100) + + bin1 = bin_data(dist1, np.max(bin_max))[0] + bin2 = bin_data(dist2, np.max(bin_max))[0] + + res = get_chi_squared([bin1, bin2]) + + # Check three distributions + dist3 = stats.logser(p=p).rvs(100) + bin3 = bin_data(dist3, np.max(bin_max))[0] + + res = get_chi_squared([bin1, bin2, bin3]) + + # Check error is thrown with only one dist + assert_raises(AssertionError, get_chi_squared, [bin1]) + + # Check error is thrown if bins are different lengths + assert_raises(AssertionError, get_chi_squared, [bin1, bin2[:-1]]) + + def test_bin_data(self): + + # Test against R's vegan prestonfit: prestonfit(data, tiesplit=FALSE) + # Note that vegan drops the bins with 0 values + + data = np.array([1, 1, 1, 1, 2, 2, 4, 4, 8, 16, 17.1, 89]) + vegan = np.array([4, 2, 2, 1, 1, 1, 0, 1], dtype=np.float) + test_res = bin_data(data, max(data))[0] + assert_array_equal(test_res, vegan) + + data = np.array([1, 1, 1, 1, 4, 5, 6, 7, 12, 34, 56]) + vegan = np.array([4, 0, 1, 3, 1, 0, 2], dtype=np.float) + test_res = bin_data(data, max(data))[0] + assert_array_equal(test_res, vegan) + + # Test boundary condition + data = np.array([1, 2]) + vegan = np.array([1, 1], dtype=np.float) + test_res = bin_data(data, max(data))[0] + assert_array_equal(test_res, vegan) + + data = np.array([1, 1, 1]) + vegan = np.array([3], dtype=np.float) + test_res = bin_data(data, max(data))[0] + assert_array_equal(test_res, vegan) + + data = np.array([1, 2, 3]) + vegan = np.array([1, 1, 1], dtype=np.float) + test_res = bin_data(data, max(data))[0] + assert_array_equal(test_res, vegan) # From b2697504277bc356b21a67d3504d525494130031 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 18 Mar 2014 19:54:55 -0700 Subject: [PATCH 059/343] Added new compare funtions to sphinx docs --- macroeco/compare.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index c5db2a5..98d27dc 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -19,7 +19,9 @@ get_nll get_empirical_cdf get_sum_of_squares - + get_r_squared + get_chi_squared + bin_data """ @@ -80,15 +82,15 @@ def get_AICC(values, params): def get_AIC_weights(aic_values): """ - Calculates the aic_weights for a given set of models + Calculates the aic_weights for a given set of models. Parameters - ---------- + ----------------- aic_values : array-like object Array-like object containing AIC values from different models Returns - ------- + ------------- (weights, delta) : tuple First element contains the relative AIC weights, second element contains the delta AIC values. @@ -280,7 +282,15 @@ def bin_data(data, max_num): Returns ------------ - tuple : (binned_data, bins_edges) + tuple : (binned_data, bin_edges) + + References + ----------------- + + .. [#] + Preston, F. (1962). The canonical distribution of commonness and rarity. + Ecology, 43, 185-215 + """ log_ub = np.ceil(np.log2(max_num)) From d7d3ed040745504ad175c7da4cd5f8b31182f859 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:01:26 -0700 Subject: [PATCH 060/343] Add local RTD theme --- doc/conf.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index 39bd59a..938b322 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -97,6 +97,15 @@ # -- Options for HTML output --------------------------------------------------- +# on_rtd is whether we are on readthedocs.org +import os +on_rtd = os.environ.get('READTHEDOCS', None) == 'True' + +if not on_rtd: # only import and set the theme if we're building docs locally + import sphinx_rtd_theme + html_theme = 'sphinx_rtd_theme' + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. #html_theme = 'scipy' From ca6c86bfcb6fef9dd4a48586570fc36f79cde7b1 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:06:28 -0700 Subject: [PATCH 061/343] Update distributions2 docstrings --- macroeco/distributions2.py | 192 +++++++++++++++++++++---------------- 1 file changed, 108 insertions(+), 84 deletions(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index c7670d2..84d617a 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -42,9 +42,8 @@ import numpy as np import numpy.random as nprand -#from scipy.misc.doccer import inherit_docstring_from from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, - docdict_discrete, docheaders) + docdict_discrete) import scipy.stats.distributions as spdist import scipy.optimize as optim import scipy.special as special @@ -58,40 +57,7 @@ def inherit_docstring_from(cls): If the decorated method has no docstring, it is simply given the docstring of `cls`s method. - Parameters - ---------- - cls : Python class or instance - A class with a method with the same name as the decorated method. - The docstring of the method in this class replaces '%(super)s' in the - docstring of the decorated method. - - Returns - ------- - f : function - The decorator function that modifies the __doc__ attribute - of its argument. - - Examples - -------- - In the following, the docstring for Bar.func created using the - docstring of `Foo.func`. - - >>> class Foo(object): - ... def func(self): - ... '''Do something useful.''' - ... return - ... - >>> class Bar(Foo): - ... @inherit_docstring_from(Foo) - ... def func(self): - ... '''%(super)s - ... Do it fast. - ... ''' - ... return - ... - >>> b = Bar() - >>> b.func.__doc__ - 'Do something useful.\n Do it fast.\n ' + From scipy.misc.doccer """ def _doc(func): @@ -106,41 +72,42 @@ def _doc(func): return _doc -_doc_param_note = \ -"""There are many available methods of `%(name)s`, each of which require one or -more of the parameters listed below. +_doc_default_callparams = \ +""" +Parameters +---------- +x : array_like + quantiles +q : array_like + lower or upper tail probability +%(shapes)s : array_like + shape parameters +loc : array_like, optional + location parameter (default=0) +scale : array_like, optional + scale parameter (default=1) +size : int or tuple of ints, optional + shape of random variates (default computed from input arguments ) +moments : str, optional + composed of letters ['mvsk'] specifying which moments to compute where + 'm' = mean, 'v' = variance, 's' = (Fisher's) skew and + 'k' = (Fisher's) kurtosis. (default='mv') """ -_doc_custom_methods = \ -"""fit2(data, %(shapes)s) - MLE estimates of shapes given initial guesses (use instead of `fit`).""" - -_doc_discrete_custom_methods = \ -"""translate_args(uargs) - Get shape parameters from user-friendly args. -fit2(data, %(shapes)s) - MLE estimates of shapes given initial guesses.""" # Remove header from all methods -_docdict_allmeth_sh = docdict['allmethods'][16:] -_docdict_discrete_allmeth_sh = docdict_discrete['allmethods'][17:] +_docdict_allmeth = docdict['allmethods'][16:] +_docdict_discrete_allmeth = docdict_discrete['allmethods'][17:] # **kwds in expect string followed by no space was throwing warning -_docdict_allmeth_sh = _docdict_allmeth_sh.replace(', **kwds','') +_docdict_allmeth = _docdict_allmeth.replace(', **kwds','') -docdict['before_notes'] = ''.join([_doc_param_note, - docheaders['methods'], - _doc_custom_methods, - _docdict_allmeth_sh, +# Create docstring helpers +docdict['before_notes'] = ''.join([_docdict_allmeth, docdict['callparams']]) -docdict_discrete['before_notes'] = ''.join([_doc_param_note, - docheaders['methods'], - _doc_discrete_custom_methods, - _docdict_discrete_allmeth_sh, - docdict_discrete['callparams']]) - - +docdict_discrete['before_notes'] = ''.join([_docdict_discrete_allmeth, + docdict['callparams']]) class rv_continuous_meco(rv_continuous): """ @@ -223,7 +190,7 @@ class rv_discrete_meco(rv_discrete): ------- translate_args takes user-friendly params as input and returns shape params - fit + fit2 estimates distribution params from data """ @@ -291,11 +258,23 @@ class geom_gen(rv_discrete_meco): .. math:: \mathrm{pmf(x)} = (1-p)^{x} p - for ``x >= 0``. The location parameter ``loc`` is not used. + for ``x >= 0``. The ``loc`` parameter is not used. + + There are many available methods of ``geom``, each of which require one or + more of the parameters listed below. + + Methods + ------- + translate_args(mu) + Get shape parameter p from distribution mean + fit2(data) + ML estimate of p from data %(before_notes)s - uargs : float + mu : float distribution mean + data : array_like + values used to fit distribution """ @@ -342,12 +321,25 @@ class geom_uptrunc_gen(rv_discrete_meco): for ``x >= 0``. `geom_uptrunc` takes two shape parameters: ``p`` and ``b``, the upper - limit. The location parameter ``loc`` is not used. + limit. The ``loc`` parameter is not used. - %(before_notes)s + There are many available methods of `geom_uptrunc`, each of which require + one or more of the parameters listed below. - uargs : float - distribution mean, upper limit + Methods + ------- + translate_args(mu, b) + Get shape parameter p from distribution mean and upper limit + fit2(data, b=sum(data)) + ML estimate of p from data and upper limit (returns p, b) + + %(before_notes)s + mu : float + distribution mean + b : float + distribution upper limit, defaults to sum of data + data : array_like + values used to fit distribution Notes ----- @@ -357,7 +349,8 @@ class geom_uptrunc_gen(rv_discrete_meco): limit. This distribution is known as the Pi distribution in the MaxEnt Theory of - Ecology [#]_, where the ``p`` parameter is known as ``exp(-lambda)``. + Ecology [#]_, where the ``p`` parameter is known as ``exp(-lambda)``. The + special case of a uniform pmf has been described as HEAP [#]_. References ---------- @@ -365,11 +358,10 @@ class geom_uptrunc_gen(rv_discrete_meco): Harte, J. (2011). Maximum Entropy and Ecology: A Theory of Abundance, Distribution, and Energetics (p. 264). Oxford, United Kingdom: Oxford University Press. - - .. - DEV: There is a difficult implicit equation needed to determine the p - parameter from the mu and b arguments. We've employed the brentq solver - here but note that it fails regularly for certain shape combinations. + .. [#] + Harte, J., Conlisk, E., Ostling, A., Green, J. L., & Smith, A. B. + (2005). A theory of spatial structure in ecological communities at + multiple spatial scales. Ecological Monographs, 75(2), 179-197. """ @@ -441,12 +433,23 @@ class nbinom_gen(spdist.nbinom_gen): \left(\frac{\mu}{k+\mu}\right)^x for ``x >= 0``. In the traditional parameterization, ``n = k`` (the size - parameter) and ``p = k / (k + mu)``. The location parameter ``loc`` is not - used. + parameter) and ``p = k / (k + mu)``. The ``loc`` parameter is not used. + + Methods + ------- + translate_args(mu) + Get shape parameter p from distribution mean + fit2(data, k_range=(0.1,100,0.1)) + ML estimate of mu and k from data, with k evaluated at (min, max, step) + values given by k_range %(before_notes)s - uargs : float - distribution mean and k parameter + mu : float + distribution mean + k : float + clustering parameter + data : array_like + values used to fit distribution """ @@ -547,11 +550,21 @@ class expon_gen(rv_continuous_meco): \mathrm{pdf(x)} = \lambda e^{-\lambda x} - for ``x >= 0``. + for ``x >= 0``. The ``loc`` and ``scale`` parameters are not used. + + + Methods + ------- + translate_args(mu) + Get shape parameter lam from distribution mean + fit2(data) + ML estimate of lam from data %(before_notes)s - uargs : float + mu : float distribution mean + data : array_like + values used to fit distribution """ @@ -590,11 +603,22 @@ class expon_uptrunc_gen(rv_continuous_meco): \mathrm{pdf(x)} = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda x}} - for ``b >= x >= 0``. + for ``b >= x >= 0``. The ``loc`` and ``scale`` parameters are not used. + + Methods + ------- + translate_args(mu, b) + Get shape parameter lam from distribution mean and upper limit + fit2(data, b=sum(data)) + ML estimate of lam from data (returns lam, b) %(before_notes)s - uargs : float - distribution mean and upper limit + mu : float + distribution mean + b : float + distribution upper limit, defaults to sum of data + data : array_like + values used to fit distribution """ From 6a11b07b0037de2fdf08002d59b2aca12db302e8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:07:05 -0700 Subject: [PATCH 062/343] Make b for uptrunc default to sum of data --- macroeco/distributions2.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 84d617a..cf700ea 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -380,6 +380,8 @@ def fit2(self, data, b=None): """ # Take mean of data as MLE of distribution mean, then calculate p mu = np.mean(data) + if not b: + b = np.sum(data) return _geom_solve_p_from_mu_vect(mu, b), b def _argcheck(self, p, b): @@ -632,7 +634,9 @@ def translate_args(self, mu, b): raise NotImplementedError, "Translation of mu to lam not implemented" @inherit_docstring_from(rv_continuous_meco) - def fit2(self, data, b=np.inf): + def fit2(self, data, b=None): + if not b: + b = np.sum(data) expon = expon_gen(a=0.0, b=b) return expon.fit(data, floc=0)[2], b From 21d730a0c3f194a328118c19fedf08f9ba8749f2 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:07:38 -0700 Subject: [PATCH 063/343] geom_uptrunc cdf should work when len(x) is 1 --- macroeco/distributions2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index cf700ea..22d4dea 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -396,7 +396,10 @@ def _pmf(self, x, p, b): def _cdf(self, x, p, b): x = np.floor(x) cdf = (1.0-(1.0-p)**(x+1)) / (1.0-(1.0-p)**(b+1)) - cdf[x > b] = 1 + try: + cdf[x > b] = 1 # Only valid if len(x)>1 + except: + pass return cdf def _stats(self, p, b): From 6d548d36eca18a82fede7f5cb40204ac5ec24055 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:08:05 -0700 Subject: [PATCH 064/343] Change name of fit2 first parameter from x to data --- macroeco/distributions2.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 22d4dea..7bdaeb9 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -463,7 +463,7 @@ def translate_args(self, mu, k): return mu, k @inherit_docstring_from(rv_discrete_meco) - def fit2(self, x, k_range=(0.1,100,0.1)): + def fit2(self, data, k_range=(0.1,100,0.1)): """%(super)s Requires one argument containing data to fit. A keyword argument k_range contains a tuple of the start, stop, and step values to search @@ -474,9 +474,9 @@ def fit2(self, x, k_range=(0.1,100,0.1)): parameter k. """ - assert len(x) > 20, "nbinom fit is not stable with <20 data points" - mu = np.mean(x) - return mu, _nbinom_solve_k_from_mu(x, mu, k_range) + assert len(data) > 20, "nbinom fit is not stable with <20 data points" + mu = np.mean(data) + return mu, _nbinom_solve_k_from_mu(data, mu, k_range) def _get_p_from_mu(self, mu, k): return k / (k + mu) @@ -522,7 +522,7 @@ def _stats(self, mu, k): nbinom = nbinom_gen(name='nbinom', shapes='mu, k') -def _nbinom_solve_k_from_mu(x, mu, k_range): +def _nbinom_solve_k_from_mu(data, mu, k_range): """ For the nbinom, given mu, return k from searching some k_range. """ @@ -530,14 +530,14 @@ def _nbinom_solve_k_from_mu(x, mu, k_range): # TODO: See if a root finder like fminbound would work with Decimal used in # logpmf method (will this work with arrays?) - def nll(x, mu, k): - return -np.sum(nbinom._logpmf(x, mu, k)) + def nll(data, mu, k): + return -np.sum(nbinom._logpmf(data, mu, k)) k_array = np.arange(*k_range) nll_array = np.zeros(len(k_array)) for i in range(len(k_array)): - nll_array[i] = nll(x, mu, k_array[i]) + nll_array[i] = nll(data, mu, k_array[i]) min_nll_idx = np.argmin(nll_array) From 56bb4986a6fdcb31c61b9e2cedba945545fd83f9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:08:44 -0700 Subject: [PATCH 065/343] Note TODO for logging param file read failure --- macroeco/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/macroeco/main.py b/macroeco/main.py index b330c51..0d626ac 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -51,6 +51,7 @@ def main(param_path='parameters.txt'): """ # Confirm file is present and extract dir name + # TODO: Because of log catch in twiggy_setup, this doesn't print anything if not os.path.isfile(param_path): raise IOError, "Parameter file not found at %s" % param_path param_dir = os.path.dirname(param_path) From 03c2d2df978a89ee2465e2c79a6218969b3fc8d5 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:09:26 -0700 Subject: [PATCH 066/343] Use mpltools to improve plot appearance --- macroeco/main.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/macroeco/main.py b/macroeco/main.py index 0d626ac..ea75ea7 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -23,9 +23,11 @@ from pandas import DataFrame import matplotlib.pyplot as plt from matplotlib.mlab import rec2csv, rec_append_fields +from mpltools import style +style.use('ggplot') from twiggy_setup import get_log -from empirical import Patch +from empirical import * from distributions2 import * from compare import * @@ -497,11 +499,30 @@ def _save_table_and_plot(cidx, models, options, mod_results, name, x, emp, width = x[1] - x[0] ax = df_plt.plot() exec plot_exec_str + ax = _pad_plot_frame(ax) fig = ax.get_figure() fig.savefig(p_path) plt.close('all') +def _pad_plot_frame(ax, pad=0.01): + """ + Provides padding on sides of frame equal to pad fraction of plot + """ + + ax.set_xlim(left=0) + ax.set_ylim(bottom=0) + + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + xrange = xmax - xmin + yrange = ymax - ymin + + ax.set_xlim(xmin - xrange*pad, xmax + xrange*pad) + ax.set_ylim(ymin - yrange*pad, ymax + yrange*pad) + + return ax + def _data_pred_curve(cidx, models, options, emp_results, mod_results): raise NotImplementedError, "Data and curve comparison not implemented" From fa7871863384743587d69d442c7b971635676460 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:10:24 -0700 Subject: [PATCH 067/343] Error catching and other fixes --- macroeco/main.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index ea75ea7..59349dc 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -146,44 +146,53 @@ def _analyze_empirical(options): # TODO: (In empirical) Create result objects rather than strange lists of # nested tuples. - # If no data path is given or data path invalid, raise error - try: - data_path = os.path.normpath(os.path.join(options['param_dir'], - options['data'])) - except Exception: - raise IOError, "Path to data file is invalid." + # If no metadata path is given or data path invalid, raise error + metadata_path = os.path.normpath(os.path.join(options['param_dir'], + options['metadata'])) + if not os.path.isfile(metadata_path): + raise IOError, "Path to metadata file %s is invalid." % metadata_path # Create Patch object for this data - patch = Patch(data_path) + patch = Patch(metadata_path) # Get cols and splits variable (req by all metrics) and add to options options['cols'], options['splits'] = _get_cols_splits(options) # Get names of args and kwargs to method specified by metric option exec ("arg_and_kwd_names, _, _, kw_defaults = " - "inspect.getargspec(Patch.%s)" % options['metric']) - arg_names = arg_and_kwd_names[1:-len(kw_defaults)] # Ignore first arg self + "inspect.getargspec(%s)" % options['metric']) + if kw_defaults: + arg_names = arg_and_kwd_names[1:-len(kw_defaults)] # Ignore patch kw_names = arg_and_kwd_names[-len(kw_defaults):] + else: + arg_names = arg_and_kwd_names[1:] + kw_names = [] # Create list with vals for all args and dict with vals for all kwargs # All required args must be in options - args = [] + args = [] # Patch is always first argument for arg_name in arg_names: try: exec 'args.append(eval("%s"))' % options[arg_name] + except SyntaxError: # eval failing because option is a string + exec 'args.append("%s")' % options[arg_name] except: raise ValueError, ("Value for required argument %s not provided" % arg_name) kwargs = {} for kw_name in kw_names: + if kw_name in options.keys(): try: - exec "kwargs[kw_name]=eval(%s)" % options[kw_name] - except Exception: - pass + exec 'kwargs[kw_name]=eval("%s")' % options[kw_name] + except SyntaxError: # eval failing because option is a string + exec 'kwargs[kw_name]="%s"' % options[kw_name] + except: + raise ValueError, ("Value for optional argument %s is invalid" + % kw_name) # Call Patch method with appropriate args and return result - return eval("patch.%s(*args, **kwargs)" % options['metric']) + return eval("%s(patch, *args, **kwargs)" % options['metric']) def _get_cols_splits(options): @@ -199,7 +208,7 @@ def _get_cols_splits(options): if 'splits' in options.keys(): splits = options['splits'] else: - splits = {} + splits = None # Cols may be given as option or individual col options may be options if 'cols' in options.keys(): From e038fed632430adc43b2e98d46cf5697f7f62840 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:10:51 -0700 Subject: [PATCH 068/343] Make main executable from command line --- macroeco/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/macroeco/main.py b/macroeco/main.py index 59349dc..09056f4 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -559,3 +559,6 @@ def _get_file_path(cidx, options, file_name): return os.path.join(options['run_dir'], '%i_%s' % (cidx+1, file_name)) + +if __name__ == '__main__': + main(sys.argv[1]) From c5170068d58d1ae039991db05db70501aa506322 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:11:16 -0700 Subject: [PATCH 069/343] Models string in parameters should be semicolon separated --- macroeco/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/main.py b/macroeco/main.py index 09056f4..cabe4bf 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -274,7 +274,7 @@ def _analyze_models_from_data(options, emp_results): """ # Get list of model names - models = options['models'].replace(' ', '').split(',') + models = options['models'].replace(' ', '').split(';') # Fit theories to all emp_results # TODO: Make work for 2D results, i.e., curves, comm_sep, o_ring From 44e1b2b55365086fe51881b3193078f34b0e21e8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:11:57 -0700 Subject: [PATCH 070/343] Extract pandas columns as values --- macroeco/main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index cabe4bf..bf89f77 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -283,7 +283,7 @@ def _analyze_models_from_data(options, emp_results): for emp_result in emp_results: output_emp_result = {} for model in models: - data = emp_result[1] + data = emp_result[1]['y'].values fits = _get_fits(data, model) values = _get_values(data, model, fits) stat_names, stats = _get_comparison_statistic(values, fits) @@ -440,7 +440,7 @@ def _data_pred_dist(cidx, models, options, emp_results, mod_results): Also make plots for all three """ - emp_result = emp_results[cidx][1] + emp_result = emp_results[cidx][1]['y'].values n_vals = len(emp_result) # CDF @@ -486,12 +486,13 @@ def calc_func(model, x, shapes): def _save_table_and_plot(cidx, models, options, mod_results, name, x, emp, - calc_func, plot_exec_str, x_plot_mult=1): + calc_func, plot_exec_str): f_path = _get_file_path(cidx, options, '%s.csv' % name) p_path = _get_file_path(cidx, options, '%s.png' % name) - df = DataFrame({'x': x * x_plot_mult}) + + df = DataFrame({'x': x}) df['empirical'] = emp for model in models: mod_result = mod_results[cidx][model] From c31f426d73c14b148bccf84d633769ba993d90fb Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:12:15 -0700 Subject: [PATCH 071/343] Change terminology combinations to splits --- macroeco/main.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index bf89f77..47c9dc9 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -354,19 +354,19 @@ def _save_results(options, emp_results, mod_results): os.makedirs(options['run_dir']) # Write outputs depending on pres/abs of emp and mod and dist/curve metric - _write_combination_index_file(options, emp_results) + _write_split_index_file(options, emp_results) _write_output(options, emp_results, mod_results) -def _write_combination_index_file(options, emp_results): +def _write_split_index_file(options, emp_results): """ - Write index of combinations table, giving number and combination + Write table giving index of splits, giving number and combination """ if not emp_results: return None - f_path = os.path.join(options['run_dir'], '_combination_index.csv') + f_path = os.path.join(options['run_dir'], '_split_index.csv') with open(f_path, 'a') as f: for i,emp_result in enumerate(emp_results): f.write("%i,%s\n" % (i+1, str(emp_result[0]))) @@ -382,18 +382,18 @@ def _write_output(options, emp_results, mod_results): # Get combinations from either emp or mod - if both exist must be same try: - n_combs = len(emp_results) + n_splits = len(emp_results) except: - n_combs = len(mod_results) + n_splits = len(mod_results) # Get list of names of models try: - models = options['models'].replace(' ','').split(",") + models = options['models'].replace(' ','').split(';') except: models = None # Loop through all combinations - for cidx in range(n_combs): + for cidx in range(n_splits): if mod_results: _write_fitted_params(cidx, models, options, mod_results) if emp_results: From 5a475a2bce74722b4453189bdce0c41d8cda9087 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:13:11 -0700 Subject: [PATCH 072/343] Improve RAD plot --- macroeco/main.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index 47c9dc9..5e1c2be 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -51,13 +51,13 @@ def main(param_path='parameters.txt'): Path to directory containing user-generated parameter file """ - + # Confirm file is present and extract dir name # TODO: Because of log catch in twiggy_setup, this doesn't print anything if not os.path.isfile(param_path): raise IOError, "Parameter file not found at %s" % param_path param_dir = os.path.dirname(param_path) - + # Get logger and announce start log = get_log(param_dir, clear=True) log.info('Starting analysis') @@ -163,7 +163,7 @@ def _analyze_empirical(options): "inspect.getargspec(%s)" % options['metric']) if kw_defaults: arg_names = arg_and_kwd_names[1:-len(kw_defaults)] # Ignore patch - kw_names = arg_and_kwd_names[-len(kw_defaults):] + kw_names = arg_and_kwd_names[-len(kw_defaults):] else: arg_names = arg_and_kwd_names[1:] kw_names = [] @@ -183,7 +183,7 @@ def _analyze_empirical(options): kwargs = {} for kw_name in kw_names: if kw_name in options.keys(): - try: + try: exec 'kwargs[kw_name]=eval("%s")' % options[kw_name] except SyntaxError: # eval failing because option is a string exec 'kwargs[kw_name]="%s"' % options[kw_name] @@ -456,17 +456,16 @@ def calc_func(model, x, shapes): x, emp_cdf, calc_func, plot_exec_str) # RAD - x = np.arange(n_vals)/float(n_vals) + 0.5/float(n_vals) + x = np.arange(n_vals) + 1 emp_rad = np.sort(emp_result)[::-1] def calc_func(model, x, shapes): - return eval("%s.ppf(x, *shapes)" % model)[::-1] + return eval("%s.ppf((x-0.5)/len(x), *shapes)" % model)[::-1] - plot_exec_str = "ax.step(x * x_plot_mult, emp, color='k')" + plot_exec_str = "ax.scatter(x, emp, color='k')" _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_rad', - x, emp_rad, calc_func, plot_exec_str, - x_plot_mult=n_vals) + x, emp_rad, calc_func, plot_exec_str) # PDF/PMF hist_bins = 11 From d947a1f7807de73dc8c9e39d1c9ccd7e086b5d97 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:13:40 -0700 Subject: [PATCH 073/343] Add test for geom_uptrunc cdf with x of length 1 --- macroeco/test_distributions2.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/macroeco/test_distributions2.py b/macroeco/test_distributions2.py index 9b3accb..29d93d9 100644 --- a/macroeco/test_distributions2.py +++ b/macroeco/test_distributions2.py @@ -52,6 +52,11 @@ def test_cdf(self): # Expected values are regular geom cdf divided by cdf at b vals = geom_uptrunc.cdf([0,1,2], 0.5, 2) assert_array_almost_equal(vals, np.array([0.5,0.75,0.875])/0.875) + + def test_cdf_x_len_1(self): + # cdf should be not throw error even if x is len 1 + vals = geom_uptrunc.cdf(0, 0.5, 2) + assert_almost_equal(vals, 0.5/0.875) def test_mean(self): mu1 = geom_uptrunc.mean(0.801, 32) From 92d7942434d0703ce528850c2c4d256b5313e18e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:16:09 -0700 Subject: [PATCH 074/343] Majore rewrite of Patch, sad, and ssad Patch now works similarly to the old Data class, reading in data and storing it. Analysis is now done by independent functions, not methods, with sad and ssad now working properly. Docstring construction is now partially automated and all docstrings are updated. --- macroeco/empirical.py | 1853 +++++++++++++++++++++++------------------ 1 file changed, 1030 insertions(+), 823 deletions(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index bc76569..8a59ab6 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -3,973 +3,1083 @@ Empirical (:mod:`macroeco.empirical`) ============================================== -This module contains distributions commonly used in analysis of ecological -patterns. At present, all distributions here are univariate. +This module contains functions used in the empirical analysis of +macroecological patterns. + +Patch +===== + +Patch is a class. + +.. autosummary:: + :toctree: generated/ + + Patch + +Metrics +======= .. autosummary:: :toctree: generated/ - Patch.__init__ - Patch.ased - Patch.sad - z + sad + ssad """ from __future__ import division +import os import numpy as np +import pandas as pd + +from configparser import ConfigParser + from math import radians, cos, sin, asin, sqrt import itertools from copy import deepcopy -from data import DataTable import scipy.spatial.distance as dist #import shapely.geometry as geo -class Patch: - ''' - An object representing an empirical census. +def doc_sub(*sub): + def dec(obj): + obj.__doc__ = obj.__doc__.format(*sub) + return obj + return dec + +metric_params = \ + """patch : Patch obj + Patch object containing data for analysis + cols : dict + Indicates which column names in patch data table are associated with + species identifiers, counts, energy, and mass. See Notes. + splits : str + If multiple analyses for subsets of patch data table are desired, + specifies how columns should be split. See Notes.""" + +metric_return = \ + """list + List of tuples containing results, where the first element of each + tuple is a string indicating the split values used for that result and + second element is a dataframe giving the result.""" + +cols_note = \ + """The parameter `cols` is a dictionary with keys for four special + columns and values giving the column name in the patch data table + associated with each special column. + + - spp_col - Unique species identifiers + - count_col - Number of individuals at a location + - energy_col - Energy of individuals + - mass_cal - Mass of individuals + + Only spp_col is always mandatory. Note that the value of spp_col may be + set to a columm in the data table giving the genus, family, functional + group, etc., which allows for analysis of this metric by those groups. + count_col is used when multiple individuals of a species may be found at + a single recorded location, as is the case in gridded censuses where all + individuals in a quadrat are "assigned" to a single point. energy_col + and mass_col are used for energy-based metrics. + """ + +splits_note = \ + """The parameter `splits` is a semicolon-separated string in the form of + "column: value", where column is a name of a column in the patch data + table and value is either (a) an integer giving the number of + equally-spaced divisions of a column, or (b) the special keyword + 'split', which evaluates all unique levels of a column. + + For example, presume a data table has columns for x and y spatial + coordinates and a column for year, of which there are three. The string + "x:2; y:2; year:split" will perform the analysis separately for each of + four subplots of the patch (created by dividing the x and y coordinates + each into two equally sized divisions) within each of the three years, + for a total of 12 separate analyses.""" + + +class Patch(object): + """ + An object representing an empirical census Parameters ---------- - data_path : str - Path to csv file containing census data. - subset : dict or str - Dictionary of permanent subset to data, {'column_name': 'condition'}, - which will limit all analysis to records in which column_name meets the - condition, ie, {'year': ('==', 2005), 'x': [('>', 20), ('<', 40)]} - restricts analysis to year 2005 and x values between 20 and 40. These - conditions can also be passed to the individual methods, but subsetting - the data table up front may save analysis time. Subsetting on a string - would look something like {'name' : [('==', 'John'), ('==', 'Harry')]}. - In addition, subset can be a query string for a SQL database. + metadata_path : str + Path to metadata file describing census data + subset : str + String describing subset of data to use for Patch analysis. See Notes. Attributes ---------- - data_table : object of class DataTable - Object containing patch data and metadata. + table : dataframe + Table of census data recorded in patch + meta : ConfigParser obj + Object similar to dict describing data table, loaded from metadata file + at metadata_path + subset : str + Subset string passed as parameter + + Notes + ----- + The table file described by the metadata must contain column names + consisting only of letters and numbers, with no spaces or other special + characters. + + The parameter subset takes different forms depending on whether the data + file described by the metadata is a csv or a sql/db file. + + For csv data files, subset is a semicolon-separated string describing + subset operations. For example, the string "year==2005; x>20; x<40; + spp=='cabr'" loads a data table containing only records for which the year + is 2005, x values are between 20 and 40, and species 'cabr'. Note that for + categorical columns, the value of the column must be enclosed in single + quotes. + + For sql/db files, subset is a SQL query string that selects the data from + the data file. + + """ + + def __init__(self, metadata_path, subset=''): + + self.meta = ConfigParser() + self.meta.read(metadata_path) + self.subset = subset + self.table = self._load_table(metadata_path, + self.meta['Description']['datapath'], + subset) + + + def _load_table(self, metadata_path, relative_data_path, subset): + """ + Load data table, taking subset if needed - ''' + Parameters + ---------- + metadata_path : str + Path to metadata file + relative_data_path : str + Path to data file from location of metadata file + subset : str + String describing subset of data to use for analysis - def __init__(self, datapath, subset = {}): - '''Initialize object of class Patch. See class documentation.''' + Returns + ------- + dataframe + Table for analysis - # Handle csv - self.data_table = DataTable(datapath, subset=subset) + """ - # If datapath is sql or db the subsetting is already done. - if type(subset) == type({}): - self.data_table.table = self.data_table.get_subtable(subset) + metadata_dir = os.path.dirname(metadata_path) + data_path = os.path.normpath(os.path.join(metadata_dir, + relative_data_path)) + type = data_path.split('.')[-1] + if type == 'csv': + full_table = pd.read_csv(data_path) + table = _subset_table(full_table, subset) + elif type in ['db', 'sql']: + table = self._get_db_table(data_path, type, subset) + else: + raise TypeError('Cannot process file of type %s' % type) - def parse_criteria(self, criteria): - ''' - Parses criteria list to get all possible column combinations. + return table + def _get_db_table(self, data_path, type): + """ + Query a database and return query result as a recarray + Parameters ---------- - criteria : dict - (See docstring for Patch.sad) - energy : bool - If False, does not return an energy column, if True, returns an - energy column. - + data_path : str + Path to the database file + type : str + Type of database, either sql or db + Returns ------- - spp_list : ndarray - 1D array listing identifiers for species in the same order as they - appear in arrays found in result. Contains all species in table, - although all species may not appear in subtables that are defined - by combinations. - spp_col : str - Name of column containing species identifiers. - count_col : str - Name of column containing counts, if any. - combinations : list of dicts - List of dictionaries giving all possible combinations of criteria. - Columns not mentioned in criteria are ignored and will be averaged - over in later analyses. - - ''' - - spp_list = None - spp_col = None - count_col = None - engy_col = None - mass_col = None - combinations = [] - - # Calculate all possible combinations of columns based on criteria - # TODO: Add error checking - for key, value in criteria.items(): - - # Look for two special values indicating species and count cols - if value == 'species': - spp_list = np.unique(self.data_table.table[key]) - spp_col = key - continue - if value == 'count': - count_col = key - continue - if value == 'energy': - engy_col = key - continue - if value == 'mass': - mass_col = key - continue + table : recarray + The database query as a recarray + + """ - # Get levels of categorial or metric data - if value == 'split': # Categorial - levels = np.unique(self.data_table.table[key]) - levels_str = [('==' , x.astype(levels.dtype)) for x in levels] - elif value == 'whole': - # Random string to minimize chance of overlap? - levels_str = [('==','whole')] - else: # Metric - - # TODO: Throw a warning if the data is not divisible by the - # divisions specified. - try: - dmin = self.data_table.meta[(key, 'minimum')] - dmax = self.data_table.meta[(key, 'maximum')] - dprec = self.data_table.meta[(key, 'precision')] - - # TODO: Error if step < prec - step = (dmax + dprec - dmin) / value - starts = np.arange(dmin, dmax + dprec, step) - ends = starts + step - except TypeError: - raise TypeError('Unable to proceed to with values ' + - 'obtained from metadata. Please check ' + - 'the metadata file and/or parameters file') - - - starts_str = [('>=', x) for x in starts] - ends_str = [('<', x) for x in ends] - levels_str = [list(lvl) for lvl in zip(starts_str, ends_str)] - - - # Add these levels to combinations dictionary - if len(combinations) == 0: # If first criteria - for i, level in enumerate(levels_str): - combinations.append({key: level}) - else: - temp_comb = [] - for i, level in enumerate(levels_str): - exist_recs = deepcopy(combinations) - for rec in exist_recs: - rec[key] = level - temp_comb += exist_recs - combinations = temp_comb + # Load table + if type == 'sql': + con = lite.connect(':memory:') + con.row_factory = lite.Row + cur = con.cursor() + + with open(data_path, 'r') as f: + sql = f.read() + + cur.executescript(sql) + + else: + con = lite.connect(data_path) + con.row_factory = lite.Row + cur = con.cursor() + + cur.execute(self.subset) + + # Check that table is not empty + db_info = cur.fetchall() + try: + col_names = db_info[0].keys() + except IndexError: + raise lite.OperationalError("Query %s to database %s is empty" % + (query_str, data_path)) + + # Convert objects to tuples + converted_info = [tuple(x) for x in db_info] + + # NOTE: Using default value for Unicode: Seems better than checking + # lengths. Should we keep the type as unicode? + dtypes=[type(x) if type(x) != unicode else 'S150' for x in db_info[0]] - if len(combinations) == 0: - combinations.append({}) + table = np.array(converted_info, dtype=zip(col_names, dtypes)) + con.commit() + con.close() + + # Return a recarray for consistency + # TODO: This should now be a pd.dataframe + return table.view(np.recarray) - return spp_list, spp_col, count_col, engy_col, mass_col, combinations +def _subset_table(full_table, subset): + """ + Return subtable matching all conditions in subset. - def sad(self, cols, splits, clean=False): - ''' - Calculates an empirical species abundance distribution given criteria. + Parameters + ---------- + full_table : dataframe + Entire data table + subset : str + String describing subset of data to use for analysis - Parameters - ---------- - cols : dict - Identifier with keys for columns to use for species ID (spp_col), - count (count_col), energy (energy_col), and mass (mass_col). Only - spp_col is mandatory. - criteria : dict - Keys for column names and value determining how to split column. - Value of 'split' divides into all unique values in column, - especially appropriate for categorical columns. Any other value is - evaluated as an integer giving the number of divisions of data - along this axis - clean : bool - If True, all the zeros are removed from the sads. If False, sads - are left as is. + Returns + ------- + dataframe + Subtable with records from table meeting requirements in subset - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the abundance for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. - ''' - # TODO: Convert all methods to take cols separately - # TODO: Incorporate correct criteria syntax into parameters - # TODO: Ensure that all methods return a list of tuples where first - # element is comb and second is array of data that is the result - - # Define cols and spp_list for whole Patch - for col in ['spp_col', 'count_col', 'energy_col', 'mass_col']: - exec col + " = cols.get(col, None)" - spp_list = np.unique(self.data_table.table[spp_col]) - - if spp_col == None: - raise TypeError('No species column specified in "criteria" ' + - 'parameter') - _,_,_,_,_, combinations = \ - self.parse_criteria(splits) - - result = [] - for comb in combinations: - - subtable = self.data_table.get_subtable(comb) - - sad_list = [] - for species in spp_list: - spp_subtable = subtable[subtable[spp_col] == species] - if count_col: - count = np.sum(spp_subtable[count_col]) - else: - count = len(spp_subtable) - sad_list.append(count) + """ + if not subset: + return full_table + + conditions = subset.split(';') + + valid = np.ones(len(full_table), dtype=bool) + for condition in conditions: + this_valid = eval('full_table.'+condition) + valid = np.logical_and(valid, this_valid) - sad_list = np.array(sad_list) + return full_table[valid] - if clean: - ind = np.where(sad_list != 0)[0] - sad_list = sad_list[ind] - temp_spp_list = spp_list[ind] + +@doc_sub(metric_params, metric_return, cols_note, splits_note) +def sad(patch, cols, splits='', clean=True): + """ + Calculates an empirical species abundance distribution + + Parameters + ---------- + {0} + clean : bool + If True, all species with zero abundance are removed from SAD results + (relevant if splits is used and some splits are missing species). + Default False. + + Returns + ------- + {1} + Result has two columns: spp (species identifier) and y + (individuals of that species). + + + Notes + ----- + {2} + {3} + + """ + + # Get required variables + spp_col, count_col = ( + [cols.get(x, None) for x in ['spp_col', 'count_col']] ) + full_spp_list = ( + np.unique(patch.table[spp_col]) ) + + # Run analysis + result_list = [] + for substring, subtable in _yield_subtables(patch, splits): + + sad_list = [] + for spp in full_spp_list: + this_spp = (subtable[spp_col] == spp) + if count_col: + count = np.sum(subtable[count_col][this_spp]) else: - temp_spp_list = spp_list + count = np.sum(this_spp) + sad_list.append(count) + subdf = pd.DataFrame({'spp': full_spp_list, 'y': sad_list}) - result.append((comb, sad_list, temp_spp_list)) + if clean: + subdf = subdf[subdf['y'] > 0] - return result + result_list.append((substring, subdf)) - def ssad(self, criteria): - ''' - Calculates empirical species-level spatial abundance distributions - given criteria. + return result_list - Parameters - ---------- - criteria : dict - See Patch.sad docstring - Returns - ------- - : tuple - Returns a tuple with two objects. The first object is an array of - dicts that correspond to the criteria used to generate each cell. - The length of the first object in equal to the number of divisions - specified. The second object is a dictionary that has length - species and each keyword is a species. Each species keyword looks - up an array with the ssad for the given species. The array that - each keyword looks up is the same length as criteria. +@doc_sub(metric_params, metric_return, cols_note, splits_note) +def ssad(patch, cols, splits=''): + """ + Calculates an empirical intra-specific spatial abundance distribution + Parameters + ---------- + {0} - ''' - sad_return = self.sad(criteria, clean=False) - spp_list = sad_return[0][2] - combs, array_res = flatten_sad(sad_return) - ssad = {} + Returns + ------- + {1} + Result has one column: y (individuals of species in each subplot). - for i, spp in enumerate(spp_list): - ssad[spp] = array_res[i,:] - return combs, ssad + Notes + ----- + The parameter `splits` is used differently in the SSAD than in the other + metrics. Here, `splits` should be used to define a set of cells over which + the abundance of each species will be evaluated. The SSAD will then return + a vector result for each species found in the patch, where the elements of + the vector are the abundances of the species in each subplot defined by the + `splits` parameter. + For example, when given the splits string "x:2, y:2", most metrics will + return 4 results, one for each cell, in which a multi-species analysis has + been performed. The SSAD will instead return S results, where S is the + number of species in patch, where each result is a vector of length 4 + giving the abundance of the species in each of the 4 subplots. - def sar(self, div_cols, div_list, criteria, form='sar', output_N=False): - ''' - Calculate an empirical species-area relationship given criteria. + {2} + {3} - Parameters - ---------- - div_cols : tuple - Column names to divide, eg, ('x', 'y'). Must be metric. - div_list : list of tuples - List of division pairs in same order as div_cols, eg, [(2,2), - (2,4), (4,4)]. Values are number of divisions of div_col. - criteria : dict - See docstring for EPatch.sad. Here, criteria SHOULD NOT include - items referring to div_cols (if there are any, they are ignored). - form : string - 'sar' or 'ear' for species or endemics area relationship. EAR is - relative to the subtable selected after criteria is applied. - output_N : bool - Adds the column N to the output rec array which contains the - average N for a given area. + """ - Returns - ------- - rec_sar: structured array - Returns a structured array with fields 'items' and 'area' that - contains the average items/species for each given area specified by - critieria. - full_result : list of ndarrays - List of same length as areas containing arrays with element for - count of species or endemics in each subpatch at corresponding - area. - ''' - - # If any element in div_cols in criteria, remove from criteria - criteria = {k: v for k, v in criteria.items() if k not in div_cols} - - # Loop through div combinations (ie, areas), calc sad, and summarize - areas = [] - mean_result = [] - full_result = [] - N_result = [] - - for div in div_list: - - # Add divs to criteria dict - this_criteria = deepcopy(criteria) - for i, col in enumerate(div_cols): - this_criteria[col] = div[i] - - # Get flattened sad for all criteria and this div - sad_return = self.sad(this_criteria) - - if output_N: - N_result.append(np.mean([sum(sad[1]) for sad in sad_return])) - - flat_sad = flatten_sad(sad_return)[1] - - # Store results - if form == 'sar': - this_full = np.sum((flat_sad > 0), axis=0) - this_mean = np.mean(this_full) - elif form == 'ear': - totcnt = np.sum(flat_sad, axis=1) - totcnt_arr = \ - np.array([list(totcnt),]*np.shape(flat_sad)[1]).transpose() - - this_full = np.sum(np.equal(flat_sad, totcnt_arr), axis=0) - this_mean = np.mean(this_full) - else: - raise NotImplementedError('No SAR of form %s available' % form) + sad_results = sad(patch, cols, splits, clean=False) + + if len(sad_results) == 1: + raise ValueError, ("SSAD requires patch to be split into more than " + "one subplot") - full_result.append(this_full) - mean_result.append(this_mean) + for i, sad_result in enumerate(sad_results): + if i == 0: # For first result, create dataframe + fulldf = sad_result[1] + fulldf.columns = ['spp', '0'] # Renames y col to 0 + else: # For other results, append col to dataframe + fulldf[str(i)] = sad_result[1]['y'] - # Store area - area = 1 - for i, col in enumerate(div_cols): - dmin = self.data_table.meta[(col, 'minimum')] - dmax = self.data_table.meta[(col, 'maximum')] - dprec = self.data_table.meta[(col, 'precision')] - length = (dmax + dprec - dmin) + result_list = [] + for row in fulldf.iterrows(): # Grab result for each species by row + row_values_array = np.array(row[1][1:], dtype=float) + result_list.append((row[1][0], pd.DataFrame({'y': row_values_array}))) - area *= length / div[i] + return result_list - areas.append(area) - # Return - if not output_N: - rec_sar = np.array(zip(mean_result, areas), dtype=[('items', - np.float), ('area', np.float)]) +def sar(self, div_cols, div_list, criteria, form='sar', output_N=False): + ''' + Calculate an empirical species-area relationship given criteria. + + Parameters + ---------- + div_cols : tuple + Column names to divide, eg, ('x', 'y'). Must be metric. + div_list : list of tuples + List of division pairs in same order as div_cols, eg, [(2,2), + (2,4), (4,4)]. Values are number of divisions of div_col. + criteria : dict + See docstring for EPatch.sad. Here, criteria SHOULD NOT include + items referring to div_cols (if there are any, they are ignored). + form : string + 'sar' or 'ear' for species or endemics area relationship. EAR is + relative to the subtable selected after criteria is applied. + output_N : bool + Adds the column N to the output rec array which contains the + average N for a given area. + + Returns + ------- + rec_sar: structured array + Returns a structured array with fields 'items' and 'area' that + contains the average items/species for each given area specified by + critieria. + full_result : list of ndarrays + List of same length as areas containing arrays with element for + count of species or endemics in each subpatch at corresponding + area. + ''' + + # If any element in div_cols in criteria, remove from criteria + criteria = {k: v for k, v in criteria.items() if k not in div_cols} + + # Loop through div combinations (ie, areas), calc sad, and summarize + areas = [] + mean_result = [] + full_result = [] + N_result = [] + + for div in div_list: + + # Add divs to criteria dict + this_criteria = deepcopy(criteria) + for i, col in enumerate(div_cols): + this_criteria[col] = div[i] + + # Get flattened sad for all criteria and this div + sad_return = self.sad(this_criteria) + + if output_N: + N_result.append(np.mean([sum(sad[1]) for sad in sad_return])) + + flat_sad = flatten_sad(sad_return)[1] + + # Store results + if form == 'sar': + this_full = np.sum((flat_sad > 0), axis=0) + this_mean = np.mean(this_full) + elif form == 'ear': + totcnt = np.sum(flat_sad, axis=1) + totcnt_arr = \ + np.array([list(totcnt),]*np.shape(flat_sad)[1]).transpose() + + this_full = np.sum(np.equal(flat_sad, totcnt_arr), axis=0) + this_mean = np.mean(this_full) else: - rec_sar = np.array(zip(mean_result, N_result, areas), - dtype=[('items', np.float), ('N', np.float), ('area', np.float)]) + raise NotImplementedError('No SAR of form %s available' % form) - return rec_sar, full_result + full_result.append(this_full) + mean_result.append(this_mean) + # Store area + area = 1 + for i, col in enumerate(div_cols): + dmin = self.data_table.meta[(col, 'minimum')] + dmax = self.data_table.meta[(col, 'maximum')] + dprec = self.data_table.meta[(col, 'precision')] + length = (dmax + dprec - dmin) - def universal_sar(self, div_cols, div_list, criteria, include_full=False): - ''' - Calculates the empirical universal sar given criteria. The universal - sar calculates the slope of the SAR and the ratio of N / S at all - the areas in div_cols (where N is the total number of species and S is - the total number of species). + area *= length / div[i] - This function assumes that the div_list contains halvings. If they are - not, the function will still work but the results will be meaningless. - An example a of div_list with halvings is: + areas.append(area) - [(1,1), (1,2), (2,2), (2,4), (4,4)] + # Return + if not output_N: + rec_sar = np.array(zip(mean_result, areas), dtype=[('items', + np.float), ('area', np.float)]) + else: + rec_sar = np.array(zip(mean_result, N_result, areas), + dtype=[('items', np.float), ('N', np.float), ('area', np.float)]) - Parameters - ---------- - div_cols : tuple - Column names to divide, eg, ('x', 'y'). Must be metric. - div_list : list of tuples - List of division pairs in same order as div_cols, eg, [(2,2), - (2,4), (4,4)]. Values are number of divisions of div_col. - criteria : dict - See docstring for EPatch.sad. Here, criteria SHOULD NOT include - items referring to div_cols (if there are any, they are ignored). - include_full : bool - If include_full = True, the division (1,1) will be included if it - was now already included. Else it will not be included. (1,1) is - equivalent to the full plot + return rec_sar, full_result - Returns - ------- - z_array : a structured array - Has the columns names: - 'z' : slope of the SAR at the given area - 'S' : Number of species at the given division - 'N' : Number of individuals at the given division - 'N/S' : The ratio of N/S at the given division - - - Notes - ----- - If you give it n divisions in div_list you will get a structured array - back that has length n - 2. Therefore, if you only have one - ''' - - # If (1,1) is not included, include it - if include_full: - try: - div_list.index((1,1)) - except ValueError: - div_list.insert(0, (1,1)) - - # Run sar with the div_cols - sar = self.sar(div_cols, div_list, criteria, output_N=True)[0] - - # sort by area - sar = np.sort(sar, order=['area'])[::-1] - - # Calculate z's - if len(sar) >= 3: # Check the length of sar - z_list = [z(sar['items'][i - 1], sar['items'][i + 1]) for i in - np.arange(1, len(sar)) if sar['items'][i] != sar['items'][-1]] +def universal_sar(self, div_cols, div_list, criteria, include_full=False): + ''' + Calculates the empirical universal sar given criteria. The universal + sar calculates the slope of the SAR and the ratio of N / S at all + the areas in div_cols (where N is the total number of species and S is + the total number of species). + + This function assumes that the div_list contains halvings. If they are + not, the function will still work but the results will be meaningless. + An example a of div_list with halvings is: + + [(1,1), (1,2), (2,2), (2,4), (4,4)] + + Parameters + ---------- + div_cols : tuple + Column names to divide, eg, ('x', 'y'). Must be metric. + div_list : list of tuples + List of division pairs in same order as div_cols, eg, [(2,2), + (2,4), (4,4)]. Values are number of divisions of div_col. + criteria : dict + See docstring for EPatch.sad. Here, criteria SHOULD NOT include + items referring to div_cols (if there are any, they are ignored). + include_full : bool + If include_full = True, the division (1,1) will be included if it + was now already included. Else it will not be included. (1,1) is + equivalent to the full plot + + + Returns + ------- + z_array : a structured array + Has the columns names: + 'z' : slope of the SAR at the given area + 'S' : Number of species at the given division + 'N' : Number of individuals at the given division + 'N/S' : The ratio of N/S at the given division + + + Notes + ----- + If you give it n divisions in div_list you will get a structured array + back that has length n - 2. Therefore, if you only have one + ''' + + # If (1,1) is not included, include it + if include_full: + try: + div_list.index((1,1)) + except ValueError: + div_list.insert(0, (1,1)) + + # Run sar with the div_cols + sar = self.sar(div_cols, div_list, criteria, output_N=True)[0] + + # sort by area + sar = np.sort(sar, order=['area'])[::-1] + + # Calculate z's + if len(sar) >= 3: # Check the length of sar + z_list = [z(sar['items'][i - 1], sar['items'][i + 1]) for i in + np.arange(1, len(sar)) if sar['items'][i] != sar['items'][-1]] + else: + return np.empty(0, dtype=[('z', np.float), ('S', np.float), ('N', + np.float), ('N/S', np.float)]) + + N_over_S = sar['N'][1:len(sar) - 1] / sar['items'][1:len(sar) - 1] + + z_array = np.array(zip(z_list, sar['items'][1:len(sar) - 1], + sar['N'][1:len(sar) - 1], N_over_S), dtype=[('z', np.float), ('S', + np.float), ('N', np.float), ('N/S', np.float)]) + + return z_array + +def comm_sep(self, plot_locs, criteria, loc_unit=None): + ''' + Calculates commonality (Sorensen and Jaccard) between pairs of plots. + + Parameters + ---------- + plot_locs : dict + Dictionary with keys equal to each plot name, which must be + represented by a column in the data table, and values equal to a + tuple of the x and y coordinate of each plot + criteria : dict + See docstring for Patch.sad. + loc_unit : str + Unit of plot locations. Special cases include 'decdeg' (decimal + degrees), returns result in km. Otherwise ignored. + + Returns + ------- + result: structured array + Returns a structured array with fields plot-a and plot-b (names of + two plots), dist (distance between plots), and sorensen and jaccard + (similarity indices). Has row for each unique pair of plots. + ''' + + # Set up sad_dict with key=plot and val=clean sad for that plot + sad_dict = {} + + # Loop through all plot cols, updating criteria, and getting spp_list + for plot in plot_locs.keys(): + + # Find current count col and remove it from criteria + for crit_key in criteria.keys(): + if criteria[crit_key] == 'count': + criteria.pop(crit_key, None) + + # Add this plot as col with counts + criteria[plot] = 'count' + + # Get SAD for existing criteria with this plot as count col + sad_return = self.sad(criteria, clean=True) + + # Check that sad_return only has one element, or throw error + if len(sad_return) > 1: + raise NotImplementedError('Too many criteria for comm_sep') + + # Get unique species list for this plot and store in sad_dict + sad_dict[plot] = sad_return[0][2] + + # Set up recarray to hold Sorensen index for all pairs of plots + n_pairs = np.sum(np.arange(len(plot_locs.keys()))) + result = np.recarray((n_pairs,), dtype=[('plot-a','S32'), + ('plot-b', 'S32'), + ('spp-a', int), + ('spp-b', int), + ('dist', float), + ('sorensen', float), + ('jaccard', float)]) + + # Loop through all combinations of plots and fill in result table + row = 0 + for pair in itertools.combinations(plot_locs.keys(), 2): + + # Names of plots + plota = pair[0] + plotb = pair[1] + + result[row]['plot-a'] = plota + result[row]['plot-b'] = plotb + + # Calculate inter-plot distance + if loc_unit == 'decdeg': + result[row]['dist'] = decdeg_distance(plot_locs[plota], + plot_locs[plotb]) else: - return np.empty(0, dtype=[('z', np.float), ('S', np.float), ('N', - np.float), ('N/S', np.float)]) + result[row]['dist'] = distance(plot_locs[plota], + plot_locs[plotb]) - N_over_S = sar['N'][1:len(sar) - 1] / sar['items'][1:len(sar) - 1] + # Get similarity indices + spp_a = len(sad_dict[plota]) + spp_b = len(sad_dict[plotb]) - z_array = np.array(zip(z_list, sar['items'][1:len(sar) - 1], - sar['N'][1:len(sar) - 1], N_over_S), dtype=[('z', np.float), ('S', - np.float), ('N', np.float), ('N/S', np.float)]) + result[row]['spp-a'] = spp_a + result[row]['spp-b'] = spp_b - return z_array + intersect = set(sad_dict[plota]).intersection(sad_dict[plotb]) + union = set(sad_dict[plota]).union(sad_dict[plotb]) - def comm_sep(self, plot_locs, criteria, loc_unit=None): - ''' - Calculates commonality (Sorensen and Jaccard) between pairs of plots. + # Fill in zero if denom is zero + if spp_a + spp_b == 0: + result[row]['sorensen'] = 0 + else: + result[row]['sorensen'] = (2*len(intersect)) / (spp_a+spp_b) - Parameters - ---------- - plot_locs : dict - Dictionary with keys equal to each plot name, which must be - represented by a column in the data table, and values equal to a - tuple of the x and y coordinate of each plot - criteria : dict - See docstring for Patch.sad. - loc_unit : str - Unit of plot locations. Special cases include 'decdeg' (decimal - degrees), returns result in km. Otherwise ignored. + if len(union) == 0: + result[row]['jaccard'] = 0 + else: + result[row]['jaccard'] = len(intersect) / len(union) - Returns - ------- - result: structured array - Returns a structured array with fields plot-a and plot-b (names of - two plots), dist (distance between plots), and sorensen and jaccard - (similarity indices). Has row for each unique pair of plots. - ''' - - # Set up sad_dict with key=plot and val=clean sad for that plot - sad_dict = {} - - # Loop through all plot cols, updating criteria, and getting spp_list - for plot in plot_locs.keys(): - - # Find current count col and remove it from criteria - for crit_key in criteria.keys(): - if criteria[crit_key] == 'count': - criteria.pop(crit_key, None) - - # Add this plot as col with counts - criteria[plot] = 'count' - - # Get SAD for existing criteria with this plot as count col - sad_return = self.sad(criteria, clean=True) - - # Check that sad_return only has one element, or throw error - if len(sad_return) > 1: - raise NotImplementedError('Too many criteria for comm_sep') - - # Get unique species list for this plot and store in sad_dict - sad_dict[plot] = sad_return[0][2] - - # Set up recarray to hold Sorensen index for all pairs of plots - n_pairs = np.sum(np.arange(len(plot_locs.keys()))) - result = np.recarray((n_pairs,), dtype=[('plot-a','S32'), - ('plot-b', 'S32'), - ('spp-a', int), - ('spp-b', int), - ('dist', float), - ('sorensen', float), - ('jaccard', float)]) - - # Loop through all combinations of plots and fill in result table - row = 0 - for pair in itertools.combinations(plot_locs.keys(), 2): - - # Names of plots - plota = pair[0] - plotb = pair[1] - - result[row]['plot-a'] = plota - result[row]['plot-b'] = plotb - - # Calculate inter-plot distance - if loc_unit == 'decdeg': - result[row]['dist'] = decdeg_distance(plot_locs[plota], - plot_locs[plotb]) - else: - result[row]['dist'] = distance(plot_locs[plota], - plot_locs[plotb]) + # Increment row counter + row += 1 - # Get similarity indices - spp_a = len(sad_dict[plota]) - spp_b = len(sad_dict[plotb]) + return result - result[row]['spp-a'] = spp_a - result[row]['spp-b'] = spp_b +def o_ring(self, div_cols, bin_edges, criteria, n0_min_max=None, + edge_correct=False, density=False): + ''' + Calculates univariate O-ring for a species. - intersect = set(sad_dict[plota]).intersection(sad_dict[plotb]) - union = set(sad_dict[plota]).union(sad_dict[plotb]) + Parameters + ---------- + div_cols : tuple + Column names containing x and y coordinates of individuals + bin_edges : iterable + List of edges of distance classes to bin histogram of distances + criteria : dict + See docstring for Patch.sad. Count column must be used. + n0_min_max : tuple + Optional min and max abundance for species to consider. Useful for + ignoring rare species with few samples and abundant species for + which calculation would take a long time. + edge_correct : bool + Correct histograms by replacing count of individuals at distance + bin with expected count if entire ring at that distance was + available (part of ring may fall outside of plot). Default False. + density : bool + If True, return densities (counts divided by area of torus defined + by bin edges) instead of counts. Default False. + + Returns + ------- + result : tuple + List of tuples with three elements each. First is combination used + to generate results, second is spp_list for that combination + (includes all species in entire landscape), and third is list of + length spp_list giving histogram of pairwise distances for each + species. + + Notes + ----- + Pairwise distances are directional, giving n(n-1) total distances, as + edge correction is directional. + + If there are no records in a combination, histogram will be None. If + there are records but a species has only one individual, histogram + will be all zeros. + + When using edge_correct or density, the maximum distance used for edge + correction, given by the mean of the last two bin_edge values, should + be no greater than one half the longer dimension of the plot. This + ensures that it is not possible for an entire edge correction buffer + to be outside of the plot, which could lead to divide by zero errors. - # Fill in zero if denom is zero - if spp_a + spp_b == 0: - result[row]['sorensen'] = 0 - else: - result[row]['sorensen'] = (2*len(intersect)) / (spp_a+spp_b) + ''' - if len(union) == 0: - result[row]['jaccard'] = 0 - else: - result[row]['jaccard'] = len(intersect) / len(union) + spp_list, spp_col, count_col, engy_col, mass, combinations = \ + self.parse_criteria(criteria) - # Increment row counter - row += 1 + bin_edges = np.array(bin_edges) - return result + result_list = [] - def o_ring(self, div_cols, bin_edges, criteria, n0_min_max=None, - edge_correct=False, density=False): - ''' - Calculates univariate O-ring for a species. + for comb in combinations: - Parameters - ---------- - div_cols : tuple - Column names containing x and y coordinates of individuals - bin_edges : iterable - List of edges of distance classes to bin histogram of distances - criteria : dict - See docstring for Patch.sad. Count column must be used. - n0_min_max : tuple - Optional min and max abundance for species to consider. Useful for - ignoring rare species with few samples and abundant species for - which calculation would take a long time. - edge_correct : bool - Correct histograms by replacing count of individuals at distance - bin with expected count if entire ring at that distance was - available (part of ring may fall outside of plot). Default False. - density : bool - If True, return densities (counts divided by area of torus defined - by bin edges) instead of counts. Default False. + # If comb includes division, cannot also use edge correction + # This would require better parsing of plot boundaries for division + if (not comb.keys() == []) and edge_correct: + raise NotImplementedError("Edge correction cannot be used " + "with combinations.") - Returns - ------- - result : tuple - List of tuples with three elements each. First is combination used - to generate results, second is spp_list for that combination - (includes all species in entire landscape), and third is list of - length spp_list giving histogram of pairwise distances for each - species. + # Get appropriate subtable for this combination + subtable = self.data_table.get_subtable(comb) - Notes - ----- - Pairwise distances are directional, giving n(n-1) total distances, as - edge correction is directional. + # Declare empty list for all histograms for all species + spp_hist_list = [] - If there are no records in a combination, histogram will be None. If - there are records but a species has only one individual, histogram - will be all zeros. + # If density is True, set edge_correct to True + if density: + edge_correct = True - When using edge_correct or density, the maximum distance used for edge - correction, given by the mean of the last two bin_edge values, should - be no greater than one half the longer dimension of the plot. This - ensures that it is not possible for an entire edge correction buffer - to be outside of the plot, which could lead to divide by zero errors. + # Set up plot polygon for edge correction + if edge_correct: + xmin = self.data_table.meta[(div_cols[0], 'minimum')] + xmax = self.data_table.meta[(div_cols[0], 'maximum')] + ymin = self.data_table.meta[(div_cols[1], 'minimum')] + ymax = self.data_table.meta[(div_cols[1], 'maximum')] - ''' + plot = geo.box(xmin, ymin, xmax, ymax) - spp_list, spp_col, count_col, engy_col, mass, combinations = \ - self.parse_criteria(criteria) + all_r = (bin_edges[:-1] + bin_edges[1:]) / 2 - bin_edges = np.array(bin_edges) + # Calculate areas of all toruses + if density: + ring_areas = [] + for i in range(len(bin_edges) - 1): + ring_areas.append(np.pi*(bin_edges[i+1]**2 - + bin_edges[i]**2)) + ring_areas = np.array(ring_areas) - result_list = [] + # Loop all species + for spp in spp_list: - for comb in combinations: + spp_subtable = subtable[subtable[spp_col] == spp] - # If comb includes division, cannot also use edge correction - # This would require better parsing of plot boundaries for division - if (not comb.keys() == []) and edge_correct: - raise NotImplementedError("Edge correction cannot be used " - "with combinations.") + # If spp not present or singleton, continue + # Ensure that if single record but count > 1, do analysis + if len(spp_subtable) == 0: + spp_hist_list.append(None) + continue - # Get appropriate subtable for this combination - subtable = self.data_table.get_subtable(comb) + # Get n0, accounting for count col + if count_col: + count = np.sum(spp_subtable[count_col]) + else: + count = len(spp_subtable) - # Declare empty list for all histograms for all species - spp_hist_list = [] + # Skip this spp if there is a min_max set and n0 out of range + if n0_min_max and (count < n0_min_max[0] or count > + n0_min_max[1]): + spp_hist_list.append(None) + continue - # If density is True, set edge_correct to True + # Get list of all points and all counts + x = spp_subtable[div_cols[0]] + y = spp_subtable[div_cols[1]] + all_points = zip(x,y) + all_counts = list(spp_subtable[count_col]) + + # Declare array to hold histogram of pairwise distances + all_hist = np.zeros(len(bin_edges) - 1) + + # Declare array to hold all sampled areas per bin if density: - edge_correct = True + all_areas = np.zeros(len(ring_areas)) - # Set up plot polygon for edge correction - if edge_correct: - xmin = self.data_table.meta[(div_cols[0], 'minimum')] - xmax = self.data_table.meta[(div_cols[0], 'maximum')] - ymin = self.data_table.meta[(div_cols[1], 'minimum')] - ymax = self.data_table.meta[(div_cols[1], 'maximum')] + # Go through all_points + for i, this_point in enumerate(all_points): - plot = geo.box(xmin, ymin, xmax, ymax) + # Get this point and remove from list of all points + this_count = all_counts[i] - all_r = (bin_edges[:-1] + bin_edges[1:]) / 2 + # Create list of all other points and counts except this + all_other_points = all_points[0:i] + all_points[i+1:] + all_other_counts = all_counts[0:i] + all_counts[i+1:] - # Calculate areas of all toruses - if density: - ring_areas = [] - for i in range(len(bin_edges) - 1): - ring_areas.append(np.pi*(bin_edges[i+1]**2 - - bin_edges[i]**2)) - ring_areas = np.array(ring_areas) - - # Loop all species - for spp in spp_list: - - spp_subtable = subtable[subtable[spp_col] == spp] - - # If spp not present or singleton, continue - # Ensure that if single record but count > 1, do analysis - if len(spp_subtable) == 0: - spp_hist_list.append(None) - continue - - # Get n0, accounting for count col - if count_col: - count = np.sum(spp_subtable[count_col]) + # Get dist from this point to all other points + # If no other points, other_dist is empty + # May still be other individs at this point + if all_other_points: + other_dist = dist.cdist(np.array([this_point]), + np.array(all_other_points)) else: - count = len(spp_subtable) + other_dist = np.array(()) + + # Repeat other point distances to acccount for their counts + other_dist = np.repeat(other_dist, all_other_counts) + + # Repeat entire other_dist to account for count here + other_dist = np.tile(other_dist, this_count) + + # Add 0 distances between individs at this point + # Multiplied by two to get directional pairwise dists + n_this_dists = this_count - 1 + if n_this_dists > 0: + other_dist = np.concatenate((other_dist, + np.zeros(n_this_dists*2))) + + # Calculate histogram of distances to other points + hist, _ = np.histogram(other_dist, bin_edges) + + # Edge correct distance + if edge_correct: + corr_fact = np.zeros(len(all_r)) + for i, r in enumerate(all_r): + x, y = this_point + circ = geo.Point(x,y).buffer(r,resolution=64) + out_len = circ.boundary.difference(plot).length + in_frac = ((circ.boundary.length - out_len) / + circ.boundary.length) + corr_fact[i] = in_frac + hist = hist / corr_fact + + # Store sampled area at each dist for density calculation + if density: + all_areas += (ring_areas * corr_fact) - # Skip this spp if there is a min_max set and n0 out of range - if n0_min_max and (count < n0_min_max[0] or count > - n0_min_max[1]): - spp_hist_list.append(None) - continue + # Add this point results to main histogram + all_hist += hist - # Get list of all points and all counts - x = spp_subtable[div_cols[0]] - y = spp_subtable[div_cols[1]] - all_points = zip(x,y) - all_counts = list(spp_subtable[count_col]) + # If density, divide all values by summed sampled torus areas + if density: + all_hist = all_hist / all_areas - # Declare array to hold histogram of pairwise distances - all_hist = np.zeros(len(bin_edges) - 1) + # Append final hist for this species to running list + spp_hist_list.append(all_hist) - # Declare array to hold all sampled areas per bin - if density: - all_areas = np.zeros(len(ring_areas)) - - # Go through all_points - for i, this_point in enumerate(all_points): - - # Get this point and remove from list of all points - this_count = all_counts[i] - - # Create list of all other points and counts except this - all_other_points = all_points[0:i] + all_points[i+1:] - all_other_counts = all_counts[0:i] + all_counts[i+1:] - - # Get dist from this point to all other points - # If no other points, other_dist is empty - # May still be other individs at this point - if all_other_points: - other_dist = dist.cdist(np.array([this_point]), - np.array(all_other_points)) - else: - other_dist = np.array(()) - - # Repeat other point distances to acccount for their counts - other_dist = np.repeat(other_dist, all_other_counts) - - # Repeat entire other_dist to account for count here - other_dist = np.tile(other_dist, this_count) - - # Add 0 distances between individs at this point - # Multiplied by two to get directional pairwise dists - n_this_dists = this_count - 1 - if n_this_dists > 0: - other_dist = np.concatenate((other_dist, - np.zeros(n_this_dists*2))) - - # Calculate histogram of distances to other points - hist, _ = np.histogram(other_dist, bin_edges) - - # Edge correct distance - if edge_correct: - corr_fact = np.zeros(len(all_r)) - for i, r in enumerate(all_r): - x, y = this_point - circ = geo.Point(x,y).buffer(r,resolution=64) - out_len = circ.boundary.difference(plot).length - in_frac = ((circ.boundary.length - out_len) / - circ.boundary.length) - corr_fact[i] = in_frac - hist = hist / corr_fact - - # Store sampled area at each dist for density calculation - if density: - all_areas += (ring_areas * corr_fact) - - # Add this point results to main histogram - all_hist += hist - - # If density, divide all values by summed sampled torus areas - if density: - all_hist = all_hist / all_areas + # For this comb, create and append tuple to result list + result_list.append((comb, spp_list, spp_hist_list)) - # Append final hist for this species to running list - spp_hist_list.append(all_hist) + return result_list - # For this comb, create and append tuple to result list - result_list.append((comb, spp_list, spp_hist_list)) - return result_list +def ied(self, criteria, normalize=True, exponent=0.75): + ''' + Calculates the individual energy distribution for the entire community + given the criteria + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy'. See + sad method for further requirements. + normalize : bool + If True, this distribution is normalized by dividing by the lowest + energy value within each element of criteria. If False, returns raw + energy values. + exponent : float + The exponent of the allometric scaling relationship if energy is + calculated from mass. + + Returns + ------- + result : list + List of tuples containing results, where first element is + dictionary of criteria for this calculation and second element is a + 1D ndarray containing the energy measurement of each individual in + the subset. The third element is the full (not unique) species + list for the given criteria. + + Notes + ----- + If count_col is None or is all ones, the entire energy column for each + subtable is returned. Else, the average energy per individual, + repeated for each individual is returned. This is equivalent to the psi + distribution from Harte (2011). - def ied(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the individual energy distribution for the entire community - given the criteria - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy'. See - sad method for further requirements. - normalize : bool - If True, this distribution is normalized by dividing by the lowest - energy value within each element of criteria. If False, returns raw - energy values. - exponent : float - The exponent of the allometric scaling relationship if energy is - calculated from mass. + ''' - Returns - ------- - result : list - List of tuples containing results, where first element is - dictionary of criteria for this calculation and second element is a - 1D ndarray containing the energy measurement of each individual in - the subset. The third element is the full (not unique) species - list for the given criteria. - - Notes - ----- - If count_col is None or is all ones, the entire energy column for each - subtable is returned. Else, the average energy per individual, - repeated for each individual is returned. This is equivalent to the psi - distribution from Harte (2011). - - - ''' - - spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ - self.parse_criteria(criteria) - - if engy_col == None and mass_col == None: - raise ValueError("No energy or mass column given") - elif engy_col == None and mass_col != None: - mass = True - this_engy = mass_col - else: - mass = False - this_engy = engy_col + spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ + self.parse_criteria(criteria) - result = [] - for comb in combinations: + if engy_col == None and mass_col == None: + raise ValueError("No energy or mass column given") + elif engy_col == None and mass_col != None: + mass = True + this_engy = mass_col + else: + mass = False + this_engy = engy_col - subtable = self.data_table.get_subtable(comb) + result = [] + for comb in combinations: - # If all counts are not 1 - if count_col and (not np.all(subtable[count_col] == 1)): + subtable = self.data_table.get_subtable(comb) - # Remove any zero counts - subtable = subtable[subtable[count_col] != 0] - # Convert counts to ints - temp_counts = subtable[count_col].astype(int) + # If all counts are not 1 + if count_col and (not np.all(subtable[count_col] == 1)): - energy = np.repeat((subtable[this_engy] / - subtable[count_col]), temp_counts) - species = np.repeat(subtable[spp_col], temp_counts) - else: - energy = subtable[this_engy] - species = subtable[spp_col] + # Remove any zero counts + subtable = subtable[subtable[count_col] != 0] + # Convert counts to ints + temp_counts = subtable[count_col].astype(int) + + energy = np.repeat((subtable[this_engy] / + subtable[count_col]), temp_counts) + species = np.repeat(subtable[spp_col], temp_counts) + else: + energy = subtable[this_engy] + species = subtable[spp_col] - # Convert mass to energy if mass is True - if mass: - energy = (energy ** exponent) + # Convert mass to energy if mass is True + if mass: + energy = (energy ** exponent) - # Normalizing energy - if normalize: - energy = energy / np.min(energy) - result.append((comb, energy, species)) + # Normalizing energy + if normalize: + energy = energy / np.min(energy) + result.append((comb, energy, species)) - return result + return result - def sed(self, criteria, normalize=True, exponent=0.75, clean=False): - ''' - Calculates the species-level energy distribution for each given species - in the community. +def sed(self, criteria, normalize=True, exponent=0.75, clean=False): + ''' + Calculates the species-level energy distribution for each given species + in the community. - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. - normalize : bool - If True, this distribution is normalized by dividing by the lowest - energy value within each element of criteria. If False, returns raw - energy values. - exponent : float - The exponent of the allometric scaling relationship if energy is - calculated from mass - clean : bool - If False, sed dictionary contains all species. If True, species - with no individuals are removed. This is useful when subsetting. + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy' or + 'mass'. See sad method for further requirements. + normalize : bool + If True, this distribution is normalized by dividing by the lowest + energy value within each element of criteria. If False, returns raw + energy values. + exponent : float + The exponent of the allometric scaling relationship if energy is + calculated from mass + clean : bool + If False, sed dictionary contains all species. If True, species + with no individuals are removed. This is useful when subsetting. + + Returns + ------- + result : list of tuples + Each tuple contains two objects. The first object is a dict with + the division specifications that generated the given species energy + distributions. The second object is a dict with a keyword + corresponding to each species in the spp_list. Each species + keyword looks up a np.array that contains the given species + energy distribution. + + Notes + ----- + The theta distribution from Harte (2011) is a an sed. - Returns - ------- - result : list of tuples - Each tuple contains two objects. The first object is a dict with - the division specifications that generated the given species energy - distributions. The second object is a dict with a keyword - corresponding to each species in the spp_list. Each species - keyword looks up a np.array that contains the given species - energy distribution. - - Notes - ----- - The theta distribution from Harte (2011) is a an sed. - - ''' - spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ - self.parse_criteria(criteria) - - ied = self.ied(criteria, normalize=normalize, exponent=exponent) - - result = [] - for this_ied in ied: - this_criteria_sed = {} - - for spp in spp_list: - spp_ind = (spp == this_ied[2]) - this_spp_sed = this_ied[1][spp_ind] - - if clean: # If True, don't add empty species lists - if len(this_spp_sed) > 0: - this_criteria_sed[spp] = this_spp_sed - else: + ''' + spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ + self.parse_criteria(criteria) + + ied = self.ied(criteria, normalize=normalize, exponent=exponent) + + result = [] + for this_ied in ied: + this_criteria_sed = {} + + for spp in spp_list: + spp_ind = (spp == this_ied[2]) + this_spp_sed = this_ied[1][spp_ind] + + if clean: # If True, don't add empty species lists + if len(this_spp_sed) > 0: this_criteria_sed[spp] = this_spp_sed + else: + this_criteria_sed[spp] = this_spp_sed - result.append((this_ied[0], this_criteria_sed)) + result.append((this_ied[0], this_criteria_sed)) - return result + return result - def ased(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the average species energy distribution for each given - species in a subset. +def ased(self, criteria, normalize=True, exponent=0.75): + ''' + Calculates the average species energy distribution for each given + species in a subset. - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy' or + 'mass'. See sad method for further requirements. + + Returns + ------- + result : list + List of tuples containing results, where the first element is a + dictionary of criteria for this calculation and second element is a + 1D ndarray of length species containing the average energy for each + species. The third element is 1D array listing identifiers for + species in the same order as they appear in the second element of + result. + + Notes + ----- + This is equivalent to the nu distribution from Harte 2011 - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the average energy for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. + ''' - Notes - ----- - This is equivalent to the nu distribution from Harte 2011 + sed = self.sed(criteria, normalize=normalize, exponent=exponent) - ''' + result = [] + for this_sed in sed: + spp_list = list(this_sed[1].viewkeys()) + spp_list.sort() - sed = self.sed(criteria, normalize=normalize, exponent=exponent) + # Take the mean energy for each species + nu = [np.mean(this_sed[1][spp]) for spp in spp_list if + len(this_sed[1][spp]) != 0] + # Truncated spp_list if necessary + spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] - result = [] - for this_sed in sed: - spp_list = list(this_sed[1].viewkeys()) - spp_list.sort() + result.append((this_sed[0], np.array(nu), np.array(spp_list))) + + return result + +def tsed(self, criteria, normalize=True, exponent=0.75): + ''' + Calculates the total species energy distribution for each given + species in a subset. + + Parameters + ---------- + criteria : dict + Dictionary must have contain a key with the value 'energy' or + 'mass'. See sad method for further requirements. + + Returns + ------- + result : list + List of tuples containing results, where the first element is a + dictionary of criteria for this calculation and second element is a + 1D ndarray of length species containing the average energy for each + species. The third element is 1D array listing identifiers for + species in the same order as they appear in the second element of + result. - # Take the mean energy for each species - nu = [np.mean(this_sed[1][spp]) for spp in spp_list if - len(this_sed[1][spp]) != 0] - # Truncated spp_list if necessary - spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] + ''' - result.append((this_sed[0], np.array(nu), np.array(spp_list))) + sed = self.sed(criteria, normalize=normalize, exponent=exponent) - return result + result = [] + for this_sed in sed: + spp_list = list(this_sed[1].viewkeys()) + spp_list.sort() - def tsed(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the total species energy distribution for each given - species in a subset. + # Take the mean energy for each species + omega = [np.sum(this_sed[1][spp]) for spp in spp_list if + len(this_sed[1][spp]) != 0] + # Truncated spp_list if necessary + spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. - - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the average energy for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. - - ''' - - sed = self.sed(criteria, normalize=normalize, exponent=exponent) - - result = [] - for this_sed in sed: - spp_list = list(this_sed[1].viewkeys()) - spp_list.sort() - - # Take the mean energy for each species - omega = [np.sum(this_sed[1][spp]) for spp in spp_list if - len(this_sed[1][spp]) != 0] - # Truncated spp_list if necessary - spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] - - result.append((this_sed[0], np.array(omega), np.array(spp_list))) + result.append((this_sed[0], np.array(omega), np.array(spp_list))) - return result + return result def flatten_sad(sad): @@ -1047,3 +1157,100 @@ def z(doubleS, halfS): '''Calculates the z for a double S value and a half S value''' return np.log(doubleS / halfS) / (2 * np.log(2)) + + + + + + +@doc_sub(splits_note) +def _yield_subtables(patch, splits): + """ + Iterator for subtables defined by a splits string + + Parameters + ---------- + patch : obj + Patch object containing data to subset + splits : str + Specifies how a column of a dataset should be split. See Notes. + + Yields + ------ + tuple + First element is subset string, second is subtable dataframe + + Notes + ----- + {0} + + """ + + if splits: + subset_list = _parse_splits(patch, splits) + for subset in subset_list: + yield subset, _subset_table(patch.table, subset) + else: + yield '', patch.table + + +@doc_sub(splits_note) +def _parse_splits(patch, splits): + """ + Parse splits string to get list of all associated subset strings. + + Parameters + ---------- + patch : obj + Patch object containing data to subset + splits : str + Specifies how a column of a dataset should be split. See Notes. + + Returns + ------- + list + List of subset strings derived from splits string + + Notes + ----- + {0} + + """ + + split_list = splits.split(';') # Split commands for each col separate + subset_list = [] # List of all subset strings + + for split in split_list: + col, val = split.split(':') + + if val == 'split': + level_list = [col + '==' + str(x) + ';' + for x in np.unique(patch.table[col])] + else: + col_min = np.min(patch.table[col]) + col_max = np.max(patch.table[col]) + step = (col_max - col_min) / eval(val) + starts = np.arange(col_min, col_max, step) + ends = starts + step + level_list = [col + '>=' + str(x) + '; ' + col + '<' + str(y)+';' + for x, y in zip(starts, ends)] + + subset_list.append(level_list) + + # Get product of all string levels as list, conv to string, drop final ; + return [''.join(x)[:-1] for x in _product(*subset_list)] + + +def _product(*args, **kwds): + """ + Generates cartesian product of lists given as arguments + + From itertools.product documentation + """ + + pools = map(tuple, args) * kwds.get('repeat', 1) + result = [[]] + for pool in pools: + result = [x+[y] for x in result for y in pool] + return result + From 7f065bee779b957bdde25958ec6906c55ff72379 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 19:39:31 -0700 Subject: [PATCH 075/343] Add pandas, configparser to requirements, mock mpltools --- doc/conf.py | 25 +++++++++++++++++++++++++ doc/requirements.txt | 2 ++ 2 files changed, 27 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index 938b322..7322dc2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -94,6 +94,31 @@ # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] +class Mock(object): + + __all__ = [] + + def __init__(self, *args, **kwargs): + pass + + def __call__(self, *args, **kwargs): + return Mock() + + @classmethod + def __getattr__(cls, name): + if name in ('__file__', '__path__'): + return '/dev/null' + elif name[0] == name[0].upper(): + mockType = type(name, (), {}) + mockType.__module__ = __name__ + return mockType + else: + return Mock() + +MOCK_MODULES = ['mpltools'] +for mod_name in MOCK_MODULES: + sys.modules[mod_name] = Mock() + # -- Options for HTML output --------------------------------------------------- diff --git a/doc/requirements.txt b/doc/requirements.txt index 40de587..ed44683 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,6 +1,8 @@ scipy numpy matplotlib +pandas numpydoc shapely +configparser macroeco From f063524fbff6974f8e6aa2f8e3940c093561865c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 20:27:29 -0700 Subject: [PATCH 076/343] Remove extraneous splits doc from ssad --- macroeco/empirical.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 8a59ab6..c9a85aa 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -347,19 +347,6 @@ def ssad(patch, cols, splits=''): Notes ----- - The parameter `splits` is used differently in the SSAD than in the other - metrics. Here, `splits` should be used to define a set of cells over which - the abundance of each species will be evaluated. The SSAD will then return - a vector result for each species found in the patch, where the elements of - the vector are the abundances of the species in each subplot defined by the - `splits` parameter. - - For example, when given the splits string "x:2, y:2", most metrics will - return 4 results, one for each cell, in which a multi-species analysis has - been performed. The SSAD will instead return S results, where S is the - number of species in patch, where each result is a vector of length 4 - giving the abundance of the species in each of the 4 subplots. - {2} {3} From 0fea975002f03fe893433d24d124d4ecea4564a8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 21:04:13 -0700 Subject: [PATCH 077/343] Clean up whitespace in distributions2 docdict formatting --- macroeco/distributions2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 7bdaeb9..7581ed3 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -103,8 +103,7 @@ def _doc(func): _docdict_allmeth = _docdict_allmeth.replace(', **kwds','') # Create docstring helpers -docdict['before_notes'] = ''.join([_docdict_allmeth, - docdict['callparams']]) +docdict['before_notes'] = ''.join([_docdict_allmeth,docdict['callparams']]) docdict_discrete['before_notes'] = ''.join([_docdict_discrete_allmeth, docdict['callparams']]) From 8376301722027e48eecbc4f2b608c57fcb12b396 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 21:16:49 -0700 Subject: [PATCH 078/343] Correct error in geom_uptrunc - solver gives variable raised to k, which is (1-p) not p --- macroeco/distributions2.py | 17 +++++++++-------- macroeco/test_distributions2.py | 16 +++++++++------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 7581ed3..29bc981 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -348,8 +348,8 @@ class geom_uptrunc_gen(rv_discrete_meco): limit. This distribution is known as the Pi distribution in the MaxEnt Theory of - Ecology [#]_, where the ``p`` parameter is known as ``exp(-lambda)``. The - special case of a uniform pmf has been described as HEAP [#]_. + Ecology [#]_, where the ``p`` parameter is given by ``1 - exp(-lambda)``. + The special case of a uniform pmf has been described as HEAP [#]_. References ---------- @@ -384,8 +384,8 @@ def fit2(self, data, b=None): return _geom_solve_p_from_mu_vect(mu, b), b def _argcheck(self, p, b): - # Unlike the traditional geometric, p can be > 0 - return (p >= 0) + # Unlike the traditional geometric, p can be < 0 + return (p <= 1) def _pmf(self, x, p, b): pmf = (1.0-p)**x * p / (1.0-(1.0-p)**(b+1)) @@ -413,11 +413,12 @@ def _geom_solve_p_from_mu(mu, b): Ref: Harte 2011, Oxford U Press. Eq. 7.50. """ - def p_eq(p, mu, b): - p, mu, b = Decimal(p), Decimal(mu), Decimal(b) - return ( (p / (1 - p)) - ((b + 1) / (p**-b - 1)) - mu ) + def p_eq(x, mu, b): + x, mu, b = Decimal(x), Decimal(mu), Decimal(b) + return ( (x / (1 - x)) - ((b + 1) / (x**-b - 1)) - mu ) - return optim.brentq(p_eq, 1e-9, 20, args=(mu, b), disp=True) + # x here is the param raised to the k power, or 1 - p + return 1 - optim.brentq(p_eq, 1e-9, 20, args=(mu, b), disp=True) _geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) diff --git a/macroeco/test_distributions2.py b/macroeco/test_distributions2.py index 29d93d9..2024527 100644 --- a/macroeco/test_distributions2.py +++ b/macroeco/test_distributions2.py @@ -70,7 +70,7 @@ def test_translate_args_harte_16(self): # From Harte 2011, Oxford U Press, Tab 7.4, n0=16 row, Eq 7.50 b = 16 mu = np.array([2, 1]) # A0/8, A0/16 - expected = np.array([0.669, 0.500]) + expected = np.array([1-0.669, 1-0.500]) ps, _ = geom_uptrunc.translate_args(mu, b) assert_almost_equal(ps, expected, decimal=3) @@ -78,7 +78,7 @@ def test_translate_args_harte_32(self): # From Harte 2011, Oxford U Press, Tab 7.4, n0=32 row, Eq 7.50 b = 32 mu = np.array([4, 2]) # A0/8, A0/16 - expected = np.array([0.801, 0.667]) + expected = np.array([1-0.801, 1-0.667]) ps, _ = geom_uptrunc.translate_args(mu, b) assert_almost_equal(ps, expected, decimal=3) @@ -86,23 +86,25 @@ def test_translate_args_mqwilber_hand_calc(self): # TODO: Confirm last 4 of tests, which more accurate b = np.array([60, 340, 34]) mu = np.array([60*.1, 340*.6, 34*.9]) - expected = np.array([.8572, 1.0036, 1.2937]) + expected = np.array([1-.8572, 1-1.0036, 1-1.2937]) ps, _ = geom_uptrunc.translate_args(mu, b) assert_almost_equal(ps, expected, decimal=3) def test_translate_args_with_sum_of_pmf(self): p1, b1 = geom_uptrunc.translate_args(341/4, 341) # Issue 33 - assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(101),p1,b1))) + assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(342),p1,b1))) p2, b2 = geom_uptrunc.translate_args(120, 200) # Arbitrary - assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(101),p2,b2))) + print p2, b2 + print (geom_uptrunc.pmf(range(201),p2,b2)) + assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(201),p2,b2))) def test_fit2(self): p1, _ = geom_uptrunc.fit2([0,10], 10) - assert_almost_equal(p1, 1) + assert_almost_equal(p1, 0) p2, _ = geom_uptrunc.fit2([1,3], 16) - assert_almost_equal(p2, 0.669, decimal=3) + assert_almost_equal(p2, 1-0.669, decimal=2) class TestNbinom(TestCase): From e32a44ecb626899905e03201919f69768272d283 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 21:17:13 -0700 Subject: [PATCH 079/343] If geom_uptrunc result is one number, return as float not array --- macroeco/distributions2.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 29bc981..b7b3b48 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -381,7 +381,12 @@ def fit2(self, data, b=None): mu = np.mean(data) if not b: b = np.sum(data) - return _geom_solve_p_from_mu_vect(mu, b), b + p = _geom_solve_p_from_mu_vect(mu, b) + + if len(np.atleast_1d(p)) == 1: # Just return float, not len 1 array + return float(p), b + else: + return p, b def _argcheck(self, p, b): # Unlike the traditional geometric, p can be < 0 From 8638405479978cbe41a7231027908337b04d52ca Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 20 Mar 2014 21:17:24 -0700 Subject: [PATCH 080/343] Disable nbinom k fitting assert --- macroeco/distributions2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index b7b3b48..390d599 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -479,7 +479,7 @@ def fit2(self, data, k_range=(0.1,100,0.1)): parameter k. """ - assert len(data) > 20, "nbinom fit is not stable with <20 data points" + #assert len(data)>20, "nbinom fit is not stable with <20 data points" mu = np.mean(data) return mu, _nbinom_solve_k_from_mu(data, mu, k_range) From 30d607338ce9d5ba4b7fde9aa7816978646830dd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 10:42:05 -0700 Subject: [PATCH 081/343] Modify geom_uptrunc test to catch mixups in p vs 1-p --- macroeco/test_distributions2.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/macroeco/test_distributions2.py b/macroeco/test_distributions2.py index 2024527..adcdebf 100644 --- a/macroeco/test_distributions2.py +++ b/macroeco/test_distributions2.py @@ -45,8 +45,9 @@ class TestGeomUptrunc(TestCase): def test_pmf(self): # Expected values are regular geo cdf divided by cdf at b - vals = geom_uptrunc.pmf([0,1,2], 0.5, 2) - assert_array_almost_equal(vals, np.array([0.5,0.25,0.125])/0.875) + vals = geom_uptrunc.pmf([0,1,2], 0.25, 2) + assert_array_almost_equal(vals, + np.array([0.25,0.1875,0.140625])/0.578125) def test_cdf(self): # Expected values are regular geom cdf divided by cdf at b @@ -95,8 +96,6 @@ def test_translate_args_with_sum_of_pmf(self): assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(342),p1,b1))) p2, b2 = geom_uptrunc.translate_args(120, 200) # Arbitrary - print p2, b2 - print (geom_uptrunc.pmf(range(201),p2,b2)) assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(201),p2,b2))) def test_fit2(self): @@ -145,7 +144,6 @@ def test_fit2_with_manual_calc(self): x = np.array([6,17,14,12,8,10,4,9,3,12,4,2,12,8,14,16,9,10,8,5,6]) mu, k = nbinom.fit2(x, k_range=(0.01,10,0.01)) assert_array_almost_equal([mu, k], [9, 8.54], decimal=2) - class TestExpon(TestCase): From fdd831795c5fa689c681b8b10338b1e6c65f3c24 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 10:42:36 -0700 Subject: [PATCH 082/343] Fix bug in expon_uptrunc fit2, was returning scale=1/lam not lam --- macroeco/distributions2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index 390d599..ccc3c32 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -646,7 +646,7 @@ def fit2(self, data, b=None): if not b: b = np.sum(data) expon = expon_gen(a=0.0, b=b) - return expon.fit(data, floc=0)[2], b + return 1/expon.fit(data, floc=0)[2], b def _rvs(self, lam, b): expon = expon_gen(a=0.0, b=b) From 1a21e7dcd282bdae3501096902bde61fe2879508 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 10:43:22 -0700 Subject: [PATCH 083/343] Refactoring _analyze_empirical --- macroeco/main.py | 92 ++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index 5e1c2be..4c3f149 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -143,34 +143,68 @@ def _analyze_empirical(options): elements are not used. """ - # TODO: (In empirical) Create result objects rather than strange lists of - # nested tuples. - + # If no metadata path is given or data path invalid, raise error metadata_path = os.path.normpath(os.path.join(options['param_dir'], options['metadata'])) if not os.path.isfile(metadata_path): raise IOError, "Path to metadata file %s is invalid." % metadata_path - # Create Patch object for this data + # Get analysis arguments patch = Patch(metadata_path) + options['cols'], options['splits'] = _get_cols_splits(options, patch) + args, kwargs = _get_args_kwargs(options) + + # Call metric function and return result + return eval("%s(patch, *args, **kwargs)" % options['metric']) + +def _get_cols_splits(options, patch): + """ + Notes + ----- + Always returns strings, even if dictionary or list is constructed here, to + ensure consistency with provided options. + + """ + + cols = {} + special_cols = ['spp_col', 'count_col', 'energy_col', 'mass_col'] + + # Cols may be given as option or individual col options may be options + if 'cols' in options.keys(): + cols = eval(options['cols']) # Must be string representing dict + else: + for col in special_cols: + cols[col] = options.get(col, None) + + # If col is still None, try to fall back to metadata + for col in special_cols: + if cols[col] is None: + cols[col] = patch.meta['Description'].get(col, None) + + # Splits may be given as option, else is set to None + if 'splits' in options.keys(): + splits = options['splits'] + else: + splits = None + + return str(cols), str(splits) - # Get cols and splits variable (req by all metrics) and add to options - options['cols'], options['splits'] = _get_cols_splits(options) + +def _get_args_kwargs(options): # Get names of args and kwargs to method specified by metric option exec ("arg_and_kwd_names, _, _, kw_defaults = " "inspect.getargspec(%s)" % options['metric']) - if kw_defaults: + if kw_defaults: # If there are kwargs arg_names = arg_and_kwd_names[1:-len(kw_defaults)] # Ignore patch kw_names = arg_and_kwd_names[-len(kw_defaults):] - else: - arg_names = arg_and_kwd_names[1:] + else: # If no kwargs + arg_names = arg_and_kwd_names[1:] # Ignore patch kw_names = [] - # Create list with vals for all args and dict with vals for all kwargs - # All required args must be in options - args = [] # Patch is always first argument + # Create list with vals for all args - all args must be in options + args = [] for arg_name in arg_names: try: exec 'args.append(eval("%s"))' % options[arg_name] @@ -180,46 +214,18 @@ def _analyze_empirical(options): raise ValueError, ("Value for required argument %s not provided" % arg_name) + # Create dict with vals for all kwargs - kwargs may be present or absent kwargs = {} for kw_name in kw_names: - if kw_name in options.keys(): + if kw_name in options.keys(): # If a value is given for this kwarg try: exec 'kwargs[kw_name]=eval("%s")' % options[kw_name] - except SyntaxError: # eval failing because option is a string + except SyntaxError: # eval failing because value is a string exec 'kwargs[kw_name]="%s"' % options[kw_name] except: raise ValueError, ("Value for optional argument %s is invalid" % kw_name) - # Call Patch method with appropriate args and return result - return eval("%s(patch, *args, **kwargs)" % options['metric']) - - -def _get_cols_splits(options): - """ - Notes - ----- - Always returns strings, even if dictionary or list is constructed here, to - ensure consistency with provided options. - - """ - - # Splits may be given as option, else is set to empty - if 'splits' in options.keys(): - splits = options['splits'] - else: - splits = None - - # Cols may be given as option or individual col options may be options - if 'cols' in options.keys(): - cols = options['cols'] - else: - cols = {} - for col in ['spp_col', 'count_col', 'energy_col', 'mass_col']: - cols[col] = options.get(col, None) - - return str(cols), str(splits) - def _analyze_models(options, emp_results): """ From 33eb6afefef469daa0f9bfd486de5e64c4245a8f Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 21 Mar 2014 15:28:14 -0700 Subject: [PATCH 084/343] Added lrt to compare --- macroeco/compare.py | 44 +++++++++++++++++++++++++++++++++++++--- macroeco/test_compare.py | 12 +++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index 98d27dc..83c58ff 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -21,6 +21,7 @@ get_sum_of_squares get_r_squared get_chi_squared + get_lrt bin_data """ @@ -206,16 +207,53 @@ def get_ks_two_sample(): """ - pass + def get_ks_one_sample(): pass -def get_lrt(): - pass +def get_lrt(nll_null, nll_alt, df): + """ + This functions compares two nested models using the likelihood ratio + test. + + Parameters + ---------- + nll_null : float + The negative log-likelihood of the null model + nll_alt : float + The negative log-likelihood of the alternative model + df_list : int + the degrees of freedom calculated as (number of free parameters in + alternative model) - (number of free parameters in null model). + Alternatively, the number of additional parameters in the alternative + model. + + Returns + ------- + : tuple + (test_statistic, p-value) + + Notes + ----- + Interpretation: p-value < alpha suggests signficant evidence for your + alternative model + + The LRT only applies to nested models. The variable test_stat is known as + the G^2 statistic. The G-test uses the fact that -2log(Likelihood_null / + Likelihood_alt) is approximately chi-squared. This assumption breaks down + for small samples sizes. + + """ + + # Calculate G^2 statistic + ll_null = nll_null * -1 + ll_alt = nll_alt * -1 + test_stat = -2 * (ll_null - ll_alt) + return (test_stat, stats.chisqprob(test_stat, df)) def get_bayes_factor(): pass diff --git a/macroeco/test_compare.py b/macroeco/test_compare.py index 7e29c47..73ae2d8 100644 --- a/macroeco/test_compare.py +++ b/macroeco/test_compare.py @@ -167,6 +167,18 @@ def test_bin_data(self): test_res = bin_data(data, max(data))[0] assert_array_equal(test_res, vegan) + def test_get_lrt(self): + + # Test against what the lrtest() R function returns + model1 = 158.0494 + model0 = 139.806 + R_chisquare = 36.4868 + R_p = 1.537e-09 + + pred_chi, pred_p = get_lrt(model1, model0, 1) + + assert_almost_equal(pred_chi, R_chisquare) + assert_almost_equal(pred_p, R_p) # # From 0eb51dd79ccdf7937f8caf043141f67027766080 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 21 Mar 2014 15:38:09 -0700 Subject: [PATCH 085/343] Added pass in ks_two_sample --- macroeco/compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index 83c58ff..4cc3c3a 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -207,7 +207,7 @@ def get_ks_two_sample(): """ - + pass def get_ks_one_sample(): From 4cb478c30a13acfe7bdc854627017bb4c92f523d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 20:01:18 -0700 Subject: [PATCH 086/343] Make lines thicker and use color blind-safe colors --- macroeco/main.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index 4c3f149..8e7e3b9 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -21,12 +21,15 @@ import configparser from pandas import DataFrame +from twiggy_setup import get_log + import matplotlib.pyplot as plt -from matplotlib.mlab import rec2csv, rec_append_fields from mpltools import style style.use('ggplot') +import matplotlib as mpl # Colorblind safe palette, colorbrewer 8 Paired +mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', + 'E69F00','F0E442','56B4E9'] -from twiggy_setup import get_log from empirical import * from distributions2 import * from compare import * @@ -456,7 +459,7 @@ def _data_pred_dist(cidx, models, options, emp_results, mod_results): def calc_func(model, x, shapes): return eval("%s.cdf(x, *shapes)" % model) - plot_exec_str = "ax.step(x, emp, color='k')" + plot_exec_str = "ax.step(x, emp, color='k', lw=3)" _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_cdf', x, emp_cdf, calc_func, plot_exec_str) @@ -512,7 +515,7 @@ def _save_table_and_plot(cidx, models, options, mod_results, name, x, emp, df_plt = df_plt.drop('empirical',1) width = x[1] - x[0] - ax = df_plt.plot() + ax = df_plt.plot(lw=3) exec plot_exec_str ax = _pad_plot_frame(ax) fig = ax.get_figure() From 6a981f5d7197a3abb8364143b539631c5bba92fd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 20:01:34 -0700 Subject: [PATCH 087/343] Make RAD plot log on y axis --- macroeco/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/main.py b/macroeco/main.py index 8e7e3b9..8758534 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -471,7 +471,7 @@ def calc_func(model, x, shapes): def calc_func(model, x, shapes): return eval("%s.ppf((x-0.5)/len(x), *shapes)" % model)[::-1] - plot_exec_str = "ax.scatter(x, emp, color='k')" + plot_exec_str = "ax.scatter(x, emp, color='k'); ax.set_yscale('log')" _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_rad', x, emp_rad, calc_func, plot_exec_str) From 13c2c4440e6f283375e1c1b2ac7efa563c440bfc Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 20:02:00 -0700 Subject: [PATCH 088/343] Throw more meaningful error is no spp_col given --- macroeco/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/macroeco/main.py b/macroeco/main.py index 8758534..a6a4c9a 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -191,6 +191,10 @@ def _get_cols_splits(options, patch): else: splits = None + # Every metric requires a spp_col + if 'spp_col' not in cols.keys(): + raise ValueError, 'spp_col not specified' + return str(cols), str(splits) From ba82b6ee33572494cf303392393c56b7150b385d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 20:02:30 -0700 Subject: [PATCH 089/343] Fix return in _get_args_kwargs --- macroeco/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macroeco/main.py b/macroeco/main.py index a6a4c9a..057e79e 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -233,6 +233,8 @@ def _get_args_kwargs(options): raise ValueError, ("Value for optional argument %s is invalid" % kw_name) + return args, kwargs + def _analyze_models(options, emp_results): """ From 64391b858a6596b2ecdb9512aabffac3a90f9fcd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 21 Mar 2014 20:02:49 -0700 Subject: [PATCH 090/343] Change figures to vector (pdf) format --- macroeco/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/main.py b/macroeco/main.py index 057e79e..29a25ba 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -503,7 +503,7 @@ def _save_table_and_plot(cidx, models, options, mod_results, name, x, emp, calc_func, plot_exec_str): f_path = _get_file_path(cidx, options, '%s.csv' % name) - p_path = _get_file_path(cidx, options, '%s.png' % name) + p_path = _get_file_path(cidx, options, '%s.pdf' % name) df = DataFrame({'x': x}) From 827631b6d5bebf66610bf0e7bf794a565a8866a9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 10:05:22 -0700 Subject: [PATCH 091/343] Refactor _data_pred_dist so RAD can write entire result table --- macroeco/main.py | 62 +++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index 29a25ba..e84ea70 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -26,7 +26,7 @@ import matplotlib.pyplot as plt from mpltools import style style.use('ggplot') -import matplotlib as mpl # Colorblind safe palette, colorbrewer 8 Paired +import matplotlib as mpl # Colorblind safe palette mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', 'E69F00','F0E442','56B4E9'] @@ -455,63 +455,65 @@ def _data_pred_dist(cidx, models, options, emp_results, mod_results): Also make plots for all three """ - emp_result = emp_results[cidx][1]['y'].values + emp_result = emp_results[cidx][1] n_vals = len(emp_result) - # CDF - # TODO: This goes up by integers to max value, can be too large - x, emp_cdf = get_empirical_cdf(emp_result) + # RAD + x = np.arange(n_vals) + 1 + df = emp_result.sort(columns='y', ascending=False) + df.rename(columns={'y': 'empirical'}, inplace=True) + df.insert(0, 'x', x) - def calc_func(model, x, shapes): - return eval("%s.cdf(x, *shapes)" % model) + def calc_func(model, df, shapes): + return eval("%s.ppf((df['x']-0.5)/len(df), *shapes)" % model)[::-1] - plot_exec_str = "ax.step(x, emp, color='k', lw=3)" + plot_exec_str="ax.scatter(df['x'], emp, color='k');ax.set_yscale('log')" - _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_cdf', - x, emp_cdf, calc_func, plot_exec_str) + _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_rad', + df, calc_func, plot_exec_str) - # RAD - x = np.arange(n_vals) + 1 - emp_rad = np.sort(emp_result)[::-1] + # CDF + # TODO: This goes up by integers to max value, can be too large + x, emp_cdf = get_empirical_cdf(emp_result['y'].values) + df = DataFrame({'x': x, 'empirical': emp_cdf}) - def calc_func(model, x, shapes): - return eval("%s.ppf((x-0.5)/len(x), *shapes)" % model)[::-1] + def calc_func(model, df, shapes): + return eval("%s.cdf(df['x'], *shapes)" % model) - plot_exec_str = "ax.scatter(x, emp, color='k'); ax.set_yscale('log')" + plot_exec_str = "ax.step(df['x'], emp, color='k', lw=3)" - _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_rad', - x, emp_rad, calc_func, plot_exec_str) + _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_cdf', + df, calc_func, plot_exec_str) # PDF/PMF hist_bins = 11 - emp_hist, edges = np.histogram(emp_result, hist_bins, normed=True) + emp_hist, edges = np.histogram(emp_result['y'].values, hist_bins, + normed=True) x = (np.array(edges[:-1]) + np.array(edges[1:])) / 2 + df = DataFrame({'x': x, 'empirical': emp_hist}) - def calc_func(model, x, shapes): + def calc_func(model, df, shapes): try: - return eval("%s.pmf(np.floor(x), *shapes)" % model) + return eval("%s.pmf(np.floor(df['x']), *shapes)" % model) except: - return eval("%s.pdf(x, *shapes)" % model) + return eval("%s.pdf(df['x'], *shapes)" % model) - plot_exec_str = "ax.bar(x-width/2, emp, width=width, color='gray')" + plot_exec_str = "ax.bar(df['x']-width/2, emp, width=width, color='gray')" _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_pdf', - x, emp_hist, calc_func, plot_exec_str) + df, calc_func, plot_exec_str) -def _save_table_and_plot(cidx, models, options, mod_results, name, x, emp, +def _save_table_and_plot(cidx, models, options, mod_results, name, df, calc_func, plot_exec_str): f_path = _get_file_path(cidx, options, '%s.csv' % name) p_path = _get_file_path(cidx, options, '%s.pdf' % name) - - df = DataFrame({'x': x}) - df['empirical'] = emp for model in models: mod_result = mod_results[cidx][model] shapes = mod_result[0] - result = calc_func(model, x, shapes) + result = calc_func(model, df, shapes) df[model] = result df.to_csv(f_path, index=False, float_format='%.4f') # Table @@ -520,7 +522,7 @@ def _save_table_and_plot(cidx, models, options, mod_results, name, x, emp, emp = df_plt['empirical'] df_plt = df_plt.drop('empirical',1) - width = x[1] - x[0] + width = df['x'].values[1] - df['x'].values[0] ax = df_plt.plot(lw=3) exec plot_exec_str ax = _pad_plot_frame(ax) From f3c1eda674b7d0ef30f6472036cb4ffb6706ccf0 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 10:34:13 -0700 Subject: [PATCH 092/343] Refactor twiggy_setup for clarity --- macroeco/twiggy_setup.py | 92 +++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/macroeco/twiggy_setup.py b/macroeco/twiggy_setup.py index 3418d5a..35bf8fd 100644 --- a/macroeco/twiggy_setup.py +++ b/macroeco/twiggy_setup.py @@ -1,60 +1,69 @@ +""" +Set up logging +""" + import twiggy import traceback import sys import os import threading as thread -# Output format for log file - remove traceback prefix -file_format = twiggy.formats.LineFormat(traceback_prefix='') - -# Output format for terminal logging - only text message part -class stdLineFormat(twiggy.formats.LineFormat): - def __call__(self, msg): - text = self.format_text(msg) - print "{text}".format(**locals()) - return "" -std_format = stdLineFormat(traceback_prefix='') - -# Logger setup - returns logger object -def get_log(log_dir='/Users/jkitzes/Desktop/', clear=False): - - # Get path to log file - must be writable (ie, not inside pyinstaller app) - log_path = os.path.join(log_dir,'log.txt') - # Delete log file if requested - if clear: - try: - os.remove(log_path) - except OSError: - pass - - # Set up outputs for file and stdout - file_output = twiggy.outputs.FileOutput(log_path, format=file_format) - std_output = twiggy.outputs.StreamOutput(format=std_format, - stream=sys.stdout) +def get_log(log_dir, clear=False): + """ + Set up and return logger object + """ - # Create emitters + # Get path to log file and clear if requested + log_file = os.path.join(log_dir,'log.txt') + if clear and os.path.isfile(log_file): + os.remove(log_file) + + # Get outputs and add emitters + file_output, std_output = _logger_outputs() twiggy.addEmitters(('file', twiggy.levels.DEBUG, None, file_output), ('stdout', twiggy.levels.INFO, None, std_output)) - # Declare logger for macroeco - # TODO: Once modules are in subdirs, change to __name__ to log module also + # Get logger + # TODO: Once modules are in subdirs, change to __name__ log = twiggy.log.name('meco') + # Log uncaught exceptions (must occur after log declared) + def log_uncaught(type1, value1, traceback1): + tb_list = traceback.format_exception(type1, value1, traceback1) + tb_str = ''.join(tb_list) + log.options(suppress_newlines=False).critical('\n'+tb_str) + sys.excepthook = log_uncaught + + # Make threads use sys.excepthook from parent process + _installThreadExcepthook() + return log -# Log uncaught exceptions -log = twiggy.log.name('meco') # If below called before log def elsewhere -def log_uncaught(type1, value1, traceback1): - tb_list = traceback.format_exception(type1, value1, traceback1) - tb_str = ''.join(tb_list) - log.options(suppress_newlines=False).critical('\n'+tb_str) -sys.excepthook = log_uncaught -# Use proper excepthook for threads also -def installThreadExcepthook(): +def _logger_outputs(): + + # To ensure that Macroeco Desktop captures stdout, we just print it + class stdLineFormat(twiggy.formats.LineFormat): + def __call__(self, msg): + text = self.format_text(msg) + print "{text}".format(**locals()) + return "" + + # Choose formats for file and stdout + file_format = twiggy.formats.LineFormat(traceback_prefix='') + std_format = stdLineFormat(traceback_prefix='') + + # Set up outputs for file and stdout and create emitters + file_output = twiggy.outputs.FileOutput(log_file, format=file_format) + std_output = twiggy.outputs.StreamOutput(format=std_format) + + return file_output, std_output + + +def _installThreadExcepthook(): """ - Workaround for sys.excepthook thread bug + Make threads use sys.excepthook from parent process http://bugs.python.org/issue1230540 """ init_old = thread.Thread.__init__ @@ -70,6 +79,3 @@ def run_with_except_hook(*args, **kw): sys.excepthook(*sys.exc_info()) self.run = run_with_except_hook thread.Thread.__init__ = init -installThreadExcepthook() - - From 3dbc1fde0bd7166d700ac9a40b418c31f4952997 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 10:36:14 -0700 Subject: [PATCH 093/343] Rename twiggy_setup to more general misc --- macroeco/desktop.py | 2 +- macroeco/main.py | 2 +- macroeco/{twiggy_setup.py => misc.py} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename macroeco/{twiggy_setup.py => misc.py} (100%) diff --git a/macroeco/desktop.py b/macroeco/desktop.py index 7ed1f3f..eaf0de2 100755 --- a/macroeco/desktop.py +++ b/macroeco/desktop.py @@ -16,7 +16,7 @@ import threading as thread import main -from twiggy_setup import get_log +from misc import get_log class RedirectText(object): def __init__(self,aWxTextCtrl): diff --git a/macroeco/main.py b/macroeco/main.py index e84ea70..6795647 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -21,7 +21,7 @@ import configparser from pandas import DataFrame -from twiggy_setup import get_log +from misc import get_log import matplotlib.pyplot as plt from mpltools import style diff --git a/macroeco/twiggy_setup.py b/macroeco/misc.py similarity index 100% rename from macroeco/twiggy_setup.py rename to macroeco/misc.py From 1416a931859a41d31af4fca26581297df103b26a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 16:00:09 -0700 Subject: [PATCH 094/343] Change log_file back to more accurate log_path --- macroeco/misc.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/macroeco/misc.py b/macroeco/misc.py index 35bf8fd..4737f40 100644 --- a/macroeco/misc.py +++ b/macroeco/misc.py @@ -15,12 +15,12 @@ def get_log(log_dir, clear=False): """ # Get path to log file and clear if requested - log_file = os.path.join(log_dir,'log.txt') - if clear and os.path.isfile(log_file): - os.remove(log_file) + log_path = os.path.join(log_dir,'log.txt') + if clear and os.path.isfile(log_path): + os.remove(log_path) # Get outputs and add emitters - file_output, std_output = _logger_outputs() + file_output, std_output = _logger_outputs(log_path) twiggy.addEmitters(('file', twiggy.levels.DEBUG, None, file_output), ('stdout', twiggy.levels.INFO, None, std_output)) @@ -41,7 +41,7 @@ def log_uncaught(type1, value1, traceback1): return log -def _logger_outputs(): +def _logger_outputs(log_path): # To ensure that Macroeco Desktop captures stdout, we just print it class stdLineFormat(twiggy.formats.LineFormat): @@ -55,7 +55,7 @@ def __call__(self, msg): std_format = stdLineFormat(traceback_prefix='') # Set up outputs for file and stdout and create emitters - file_output = twiggy.outputs.FileOutput(log_file, format=file_format) + file_output = twiggy.outputs.FileOutput(log_path, format=file_format) std_output = twiggy.outputs.StreamOutput(format=std_format) return file_output, std_output From ef9a346094d9a7837ac6ce5c9916c715fc61e6e2 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 16:00:44 -0700 Subject: [PATCH 095/343] min and max in Patch table should come from metadata --- macroeco/empirical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index c9a85aa..f915d38 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -1214,8 +1214,8 @@ def _parse_splits(patch, splits): level_list = [col + '==' + str(x) + ';' for x in np.unique(patch.table[col])] else: - col_min = np.min(patch.table[col]) - col_max = np.max(patch.table[col]) + col_min = eval(patch.meta[col]['min']) + col_max = eval(patch.meta[col]['max']) step = (col_max - col_min) / eval(val) starts = np.arange(col_min, col_max, step) ends = starts + step From 35a34dc2bd97e9a2d8343f46758565f8642d6985 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 16:29:35 -0700 Subject: [PATCH 096/343] Whitespace cleanup --- macroeco/distributions2.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index ccc3c32..a18ba85 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -669,7 +669,3 @@ def _stats(self, lam, b): return expon.stats(lam) expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') - - - - From 9a3ede255acb52b56f3762b86c5394831e6778e5 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 16:29:57 -0700 Subject: [PATCH 097/343] Major refactoring of main, support generic function-based analysis --- macroeco/main.py | 511 ++++++++++++++++++++++------------------------- 1 file changed, 239 insertions(+), 272 deletions(-) diff --git a/macroeco/main.py b/macroeco/main.py index 6795647..a1dcb17 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -15,13 +15,14 @@ """ from __future__ import division +import sys import os import shutil import inspect import configparser -from pandas import DataFrame -from misc import get_log +import numpy as np +import pandas as pd import matplotlib.pyplot as plt from mpltools import style @@ -30,18 +31,10 @@ mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', 'E69F00','F0E442','56B4E9'] -from empirical import * -from distributions2 import * -from compare import * - - -# Dictionary with keys for allowable metrics and func type -metric_types = { - 'sad': 'dist', - 'sar': 'curve', - 'ear': 'curve', - 'ssad': 'dist', -} +from misc import get_log +import empirical as emp +import distributions2 as mod +import compare as comp def main(param_path='parameters.txt'): @@ -55,8 +48,7 @@ def main(param_path='parameters.txt'): """ - # Confirm file is present and extract dir name - # TODO: Because of log catch in twiggy_setup, this doesn't print anything + # Confirm file is present and extract dir if not os.path.isfile(param_path): raise IOError, "Parameter file not found at %s" % param_path param_dir = os.path.dirname(param_path) @@ -67,7 +59,10 @@ def main(param_path='parameters.txt'): # Read parameter file into params object params = configparser.ConfigParser() - params.read(param_path) + try: + params.read(param_path) + except: + raise ValueError, "Parameter file is invalid" # Do analysis for each run with options dict (params + addl options) run_names = params.sections() @@ -76,34 +71,10 @@ def main(param_path='parameters.txt'): options = dict(params[run_name]) options['param_dir'] = os.path.abspath(param_dir) options['run_dir'] = os.path.join(param_dir, run_name) - options['metric_type'] = _check_metric(options) _do_analysis(options) log.info('Finished analysis successfully') -def _check_metric(options): - """ - Checks if metric is in options list and returns string for metric type. - - Parameters - ---------- - options : dict - Option names and values for analysis - - Returns - ------- - str - 'dist' for distribution, 'curve' for curve type, None if no metric is - specified. - """ - if not 'metric' in options: - return None - try: - return metric_types[options['metric']] - except Exception: - raise NotImplementedError, ("No analysis for metric %s is currently " - "possible." % options['metric']) - def _do_analysis(options): """ Do analysis for a single run, as specified by options. @@ -115,22 +86,35 @@ def _do_analysis(options): """ - if 'metric' in options: - emp_results = _analyze_empirical(options) - else: - emp_results = None + module = _function_location(options) + core_results = _call_analysis_function(options, module) - if 'models' in options: - mod_results = _analyze_models(options, emp_results) + if module == 'emp' and ('models' in options.keys()): + fit_results = _fit_models(options, core_results) else: - mod_results = None + fit_results = None - _save_results(options, emp_results, mod_results) + _save_results(options, module, core_results, fit_results) -def _analyze_empirical(options): +def _function_location(options): + # TODO: Add check for spec module + func_name = options['analysis'].split('.')[0] # Ignore method if present + emp_funcs = [x[0] for x in inspect.getmembers(emp)] + mod_funcs = [x[0] for x in inspect.getmembers(mod)] + if func_name in emp_funcs: + module = 'emp' + elif func_name in mod_funcs: + module = 'mod' + else: + raise ValueError, ("No analysis of type '%s' is available" % + options['analysis']) + return module + + +def _call_analysis_function(options, module): """ - Perform empirical analysis of metric on data set + Call function and get return, using inputs from options Parameters ---------- @@ -139,29 +123,81 @@ def _analyze_empirical(options): Returns ------- - list of tuples - Each tuple corresponds to a combination (see XXX), with first element - of the tuple giving a dictionary describing the combination and the - second element giving the result of the analysis. Any additional - elements are not used. + tuple or list of tuples + First element of the tuple gives a string describing the result and the + second element giving the result of the analysis as a dataframe. + Functions in the empirical module return a list of tuples, where each + tuple corresponds to a split. All other functions return a single + tuple. + + """ + + args, kwargs = _get_args_kwargs(options, module) + return eval("%s.%s(*args, **kwargs)" % (module, options['analysis'])) + + +def _get_args_kwargs(options, module): + """ + Given an analysis, options, and a module, extract args and kwargs + """ + + if module == 'emp': + options = _emp_extra_options(options) + arg_names, kw_names = _arg_kwarg_lists(options, module) + + # Create list of values for arg_names + args = [] + for arg_name in arg_names: + + if arg_name == 'patch': # For patch arg, append actual patch obj + args.append(options['patch']) + continue + if arg_name == 'self': # Ignore self from class methods + continue + if arg_name == 'k': # scipy dists use k and x, we always use x + arg_name = 'x' + + try: + exec 'args.append(eval("%s"))' % options[arg_name] + except SyntaxError: # eval failing because option is a string + args.append(options[arg_name]) + except: + raise ValueError, ("Value for required argument %s not provided" + % arg_name) + + # Create dict with vals for kw_names + kwargs = {} + for kw_name in kw_names: + if kw_name in options.keys(): # If a value is given for this kwarg + try: + exec 'kwargs[kw_name] = eval("%s")' % options[kw_name] + except SyntaxError: # eval failing because value is a string + kwargs[kw_name] = options[kw_name] + except: + raise ValueError, ("Value for optional argument %s is invalid" + % kw_name) + + return args, kwargs + +def _emp_extra_options(options): + """ + Get special options patch, cols, and splits if analysis in emp module """ - # If no metadata path is given or data path invalid, raise error metadata_path = os.path.normpath(os.path.join(options['param_dir'], options['metadata'])) if not os.path.isfile(metadata_path): - raise IOError, "Path to metadata file %s is invalid." % metadata_path + raise IOError, ("Path to metadata file %s is invalid." % + metadata_path) - # Get analysis arguments - patch = Patch(metadata_path) - options['cols'], options['splits'] = _get_cols_splits(options, patch) - args, kwargs = _get_args_kwargs(options) + options['patch'] = emp.Patch(metadata_path) + options['cols'], options['splits'] = _get_cols_splits(options) - # Call metric function and return result - return eval("%s(patch, *args, **kwargs)" % options['metric']) + return options -def _get_cols_splits(options, patch): + +def _get_cols_splits(options): """ Notes ----- @@ -183,7 +219,7 @@ def _get_cols_splits(options, patch): # If col is still None, try to fall back to metadata for col in special_cols: if cols[col] is None: - cols[col] = patch.meta['Description'].get(col, None) + cols[col] = options['patch'].meta['Description'].get(col, None) # Splits may be given as option, else is set to None if 'splits' in options.keys(): @@ -198,158 +234,95 @@ def _get_cols_splits(options, patch): return str(cols), str(splits) -def _get_args_kwargs(options): +def _arg_kwarg_lists(options, module): - # Get names of args and kwargs to method specified by metric option + # Get names of args and kwargs to method specified by analysis option exec ("arg_and_kwd_names, _, _, kw_defaults = " - "inspect.getargspec(%s)" % options['metric']) + "inspect.getargspec(%s.%s)" % (module, options['analysis'])) if kw_defaults: # If there are kwargs - arg_names = arg_and_kwd_names[1:-len(kw_defaults)] # Ignore patch + arg_names = arg_and_kwd_names[:-len(kw_defaults)] kw_names = arg_and_kwd_names[-len(kw_defaults):] else: # If no kwargs - arg_names = arg_and_kwd_names[1:] # Ignore patch + arg_names = arg_and_kwd_names kw_names = [] - # Create list with vals for all args - all args must be in options - args = [] - for arg_name in arg_names: - try: - exec 'args.append(eval("%s"))' % options[arg_name] - except SyntaxError: # eval failing because option is a string - exec 'args.append("%s")' % options[arg_name] - except: - raise ValueError, ("Value for required argument %s not provided" - % arg_name) - - # Create dict with vals for all kwargs - kwargs may be present or absent - kwargs = {} - for kw_name in kw_names: - if kw_name in options.keys(): # If a value is given for this kwarg - try: - exec 'kwargs[kw_name]=eval("%s")' % options[kw_name] - except SyntaxError: # eval failing because value is a string - exec 'kwargs[kw_name]="%s"' % options[kw_name] - except: - raise ValueError, ("Value for optional argument %s is invalid" - % kw_name) - - return args, kwargs - - -def _analyze_models(options, emp_results): - """ - Perform theoretical analysis based on empirical data or options - - Parameters - ---------- - options : dict - Option names and values for analysis - emp_results : list of tuples - Output of method of `empirical.Patch`, or None if no data given - - Returns - ------- - list of tuples - Each tuple corresponds to a combination in emp_result, with one element - in each tuple for the result of each model comparison. The result - object is another tuple of fitted parameters (tuple), values (array), - comparison statistic names (list), and comparison statistic values - (list). - - """ - - if emp_results: - mod_results = _analyze_models_from_data(options, emp_results) - else: - mod_results = _analyze_models_from_options(options) + # Inspection for rv classes doesn't work since it uses args internally + # Unless method is translate_args or fit2, appens shapes to args + try: + obj_meth = options['analysis'].split('.') + if obj_meth[1] not in ['fit2', 'translate_args']: + arg_names += eval(module+'.'+obj_meth[0]+'.'+"shapes.split(',')") + except: + pass - return mod_results + return arg_names, kw_names -def _analyze_models_from_data(options, emp_results): +def _fit_models(options, core_results): """ - Perform model analysis based on empirical data + Fit models to empirical result from a function in emp module Parameters ---------- options : dict Option names and values for analysis - emp_results : list of tuples - Output of method of `empirical.Patch` + core_results : list of tuples + Output of function in emp Returns ------- list of dicts - Each dict in the list corresponds to the similarly indexed combination - in emp_result. Dicts have a key for each given model name, with values - that are a four element list of fitted parameters (tuple), values - (array), comparison statistic names (tuple), and comparison statistic - values (tuple). + Each element in list corresponds to a split. The dict has a key for + each model given in options, and the value is a list of fitted + parameters (tuple), values (array), comparison statistic names (list), + and comparison statistic values (list). + + Notes + ----- + To determine if the empirical result refers to a curve or a distribution, + the result dataframe is inspected for a column 'x', which indicates a + curve. """ - # Get list of model names models = options['models'].replace(' ', '').split(';') - # Fit theories to all emp_results # TODO: Make work for 2D results, i.e., curves, comm_sep, o_ring - # TODO: Make work for curves in general - output_all = [] - for emp_result in emp_results: - output_emp_result = {} + # TODO: Make work for curves in general (check if 'x' present in core_res) + extra_results = [] + for core_result in core_results: # Each split + extra_result = {} for model in models: - data = emp_result[1]['y'].values + data = core_result[1]['y'].values fits = _get_fits(data, model) values = _get_values(data, model, fits) stat_names, stats = _get_comparison_statistic(values, fits) - output_emp_result[model] = [fits, values, stat_names, stats] - output_all.append(output_emp_result) + extra_result[model] = [fits, values, stat_names, stats] + extra_results.append(extra_result) - return output_all - - -def _analyze_models_from_options(options): - """ - Perform model analysis based on options - - Parameters - ---------- - options : dict - Option names and values for analysis - - Returns - ------- - list of tuples - List of length 1 containing 1 tuple of length 1 (parallel structure to - _analyze_models_with_data). Content of that tuple is fitted parameters - (tuple). - - """ - raise NotImplementedError, "Models cannot be analyzed without data" - - #_get_fits_from_options should call model.translate_args (if exists) + return extra_results def _get_fits(data, model): - return eval("%s.fit2(data)" % model) + return eval("mod.%s.fit2(data)" % model) def _get_values(data, model, fits): try: - values = eval("%s.pdf(data, *fits)" % model) + values = eval("mod.%s.pdf(data, *fits)" % model) except AttributeError: - values = eval("%s.pmf(data, *fits)" % model) + values = eval("mod.%s.pmf(data, *fits)" % model) except: pass return values def _get_comparison_statistic(data, fits): - return ['AIC'], [get_AIC(data, fits)] + return ['AIC'], [comp.get_AIC(data, fits)] -def _save_results(options, emp_results, mod_results): +def _save_results(options, module, core_results, fit_results): """ Save results of analysis as tables and figures @@ -357,10 +330,12 @@ def _save_results(options, emp_results, mod_results): ---------- options : dict Option names and values for analysis - emp_results : list - Results of empirical metric analysis from _analyze_empirical - mod_results : list - Results of theoretical metric analysis from _analyze_theoretical + module : str + Module that contained function used to generate core_results + core_results : list, dataframe, or array + Results of main analysis + fit_results : list + Results of comparing emp analysis to models, None if not applicable """ @@ -368,151 +343,169 @@ def _save_results(options, emp_results, mod_results): shutil.rmtree(options['run_dir'], ignore_errors=True) os.makedirs(options['run_dir']) - # Write outputs depending on pres/abs of emp and mod and dist/curve metric - _write_split_index_file(options, emp_results) - _write_output(options, emp_results, mod_results) + # Write core results + _write_core_tables(options, module, core_results) + + # Write additional results if analysis from emp + if module == 'emp': + _write_split_index_file(options, core_results) + + if fit_results: # If models given + for i, core_result in enumerate(core_results): + models = options['models'].replace(' ','').split(';') + _write_fitted_params(i, models, options, fit_results) + _write_test_statistics(i, models, options, fit_results) + _write_comparison_plots_tables(i, models, options, + core_results, fit_results) -def _write_split_index_file(options, emp_results): +def _write_split_index_file(options, core_results): """ Write table giving index of splits, giving number and combination """ - - if not emp_results: - return None f_path = os.path.join(options['run_dir'], '_split_index.csv') with open(f_path, 'a') as f: - for i,emp_result in enumerate(emp_results): - f.write("%i,%s\n" % (i+1, str(emp_result[0]))) + for i, core_result in enumerate(core_results): + f.write("%i,%s\n" % (i+1, str(core_result[0]))) -def _write_output(options, emp_results, mod_results): +def _write_core_tables(options, module, core_results): """ - Three groups of output - - Fitted params (always if there is a model) - - Data and pred (always if there is data, although no pred if no models) - - Test statistis (only if both data and model) + Notes + ----- + Depending on function that was called for analysis, core_results may be a + list of tuples (empirical), a dataframe, an array, or a single value. + + For the list of tuples from empirical, the second element of each tuple is + the raw result, and we write them all with the appropriate prefix. For + dataframes, we write them. For arrays or single values, we convert to data + frames and write them. + """ - # Get combinations from either emp or mod - if both exist must be same - try: - n_splits = len(emp_results) - except: - n_splits = len(mod_results) + table_name = 'core_result.csv' + single_file_path = os.path.join(options['run_dir'], table_name) - # Get list of names of models - try: - models = options['models'].replace(' ','').split(';') - except: - models = None + if module == 'emp': # List of tuples + for i, core_result in enumerate(core_results): + file_path = _get_file_path(i, options, table_name) + core_result[1].to_csv(file_path, index=False, float_format='%.4f') + + elif type(core_results) == type(pd.DataFrame()): # DataFrame + core_results.to_csv(single_file_path, index=False, float_format='%.4f') + + else: # Array or single value (atleast_1d corrects for unsized array) + df = pd.DataFrame({'y': np.atleast_1d(core_results)}) + df.to_csv(single_file_path, index=False, float_format='%.4f') - # Loop through all combinations - for cidx in range(n_splits): - if mod_results: - _write_fitted_params(cidx, models, options, mod_results) - if emp_results: - _write_and_plot_data_pred(cidx, models, options, emp_results, - mod_results) - if mod_results and emp_results: - _write_test_statistics(cidx, models, options, mod_results) + +def _get_file_path(spid, options, file_name): + return os.path.join(options['run_dir'], + '%i_%s' % (spid+1, file_name)) -def _write_fitted_params(cidx, models, options, mod_results): +def _write_fitted_params(spid, models, options, fit_results): - f = open(_get_file_path(cidx, options, "fitted_params.csv"), 'w') + f = open(_get_file_path(spid, options, 'fitted_params.csv'), 'w') f.write("Model, Fit Parameters\n") for model in models: - mod_result = mod_results[cidx][model] - mod_fits = str(mod_result[0])[1:-1] # Drop parens around tuple + fit_result = fit_results[spid][model] + mod_fits = str(fit_result[0])[1:-1] # Drop parens around tuple f.write("%s,%s\n" % (model, mod_fits)) f.close() -def _write_and_plot_data_pred(cidx, models, options, emp_results, mod_results): - """ - For distributions, will write and plot three kinds of comparisons - - pdf/pmf vs histogram - - cdf vs emp cdf - - rad vs rad +def _write_test_statistics(spid, models, options, fit_results): + # TODO: Add delta test statistics columns - For curves, we'll only do data vs pred (note will have x and y values) - """ + f = open(_get_file_path(spid, options, 'test_statistics.csv'), 'w') + + # Gets stat name list from any element of result dict - same for all + stat_names_list = next(fit_results[spid].itervalues())[2] + stat_names_str = str(stat_names_list)[1:-1].strip("'") - if options['metric_type'] == 'dist': - _data_pred_dist(cidx, models, options, emp_results, mod_results) - elif options['metric_type'] == 'curve': - _data_pred_curve(cidx, models, options, emp_results, mod_results) + f.write("Theory, %s\n" % stat_names_str) + for model in models: + fit_result = fit_results[spid][model] + fit_stats = str(fit_result[3])[1:-1] + f.write("%s,%s\n" % (model, fit_stats)) + f.close() -def _data_pred_dist(cidx, models, options, emp_results, mod_results): + +def _write_comparison_plots_tables(spid, models, options, core_results, + fit_results): """ - These tables have column for data and each model. + Notes + ----- + Only applies to analysis using functions from empirical in which models are + also given. + - pdf/pmf vs histogram - cdf vs emp cdf - rad vs rad - Also make plots for all three """ - emp_result = emp_results[cidx][1] - n_vals = len(emp_result) + core_result = core_results[spid][1] + n_vals = len(core_result) # RAD x = np.arange(n_vals) + 1 - df = emp_result.sort(columns='y', ascending=False) + df = core_result.sort(columns='y', ascending=False) df.rename(columns={'y': 'empirical'}, inplace=True) df.insert(0, 'x', x) def calc_func(model, df, shapes): - return eval("%s.ppf((df['x']-0.5)/len(df), *shapes)" % model)[::-1] + return eval("mod.%s.ppf((df['x']-0.5)/len(df), *shapes)" % model)[::-1] plot_exec_str="ax.scatter(df['x'], emp, color='k');ax.set_yscale('log')" - _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_rad', + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_rad', df, calc_func, plot_exec_str) # CDF # TODO: This goes up by integers to max value, can be too large - x, emp_cdf = get_empirical_cdf(emp_result['y'].values) - df = DataFrame({'x': x, 'empirical': emp_cdf}) + x, emp_cdf = comp.get_empirical_cdf(core_result['y'].values) + df = pd.DataFrame({'x': x, 'empirical': emp_cdf}) def calc_func(model, df, shapes): - return eval("%s.cdf(df['x'], *shapes)" % model) + return eval("mod.%s.cdf(df['x'], *shapes)" % model) - plot_exec_str = "ax.step(df['x'], emp, color='k', lw=3)" + plot_exec_str = "ax.step(df['x'], emp, color='k', lw=3);ax.set_ylim(top=1)" - _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_cdf', + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_cdf', df, calc_func, plot_exec_str) # PDF/PMF hist_bins = 11 - emp_hist, edges = np.histogram(emp_result['y'].values, hist_bins, + emp_hist, edges = np.histogram(core_result['y'].values, hist_bins, normed=True) x = (np.array(edges[:-1]) + np.array(edges[1:])) / 2 - df = DataFrame({'x': x, 'empirical': emp_hist}) + df = pd.DataFrame({'x': x, 'empirical': emp_hist}) def calc_func(model, df, shapes): try: - return eval("%s.pmf(np.floor(df['x']), *shapes)" % model) + return eval("mod.%s.pmf(np.floor(df['x']), *shapes)" % model) except: return eval("%s.pdf(df['x'], *shapes)" % model) plot_exec_str = "ax.bar(df['x']-width/2, emp, width=width, color='gray')" - _save_table_and_plot(cidx, models, options, mod_results, 'data_pred_pdf', + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_pdf', df, calc_func, plot_exec_str) -def _save_table_and_plot(cidx, models, options, mod_results, name, df, +def _save_table_and_plot(spid, models, options, fit_results, name, df, calc_func, plot_exec_str): - f_path = _get_file_path(cidx, options, '%s.csv' % name) - p_path = _get_file_path(cidx, options, '%s.pdf' % name) + f_path = _get_file_path(spid, options, '%s.csv' % name) + p_path = _get_file_path(spid, options, '%s.pdf' % name) for model in models: - mod_result = mod_results[cidx][model] - shapes = mod_result[0] + fit_result = fit_results[spid][model] + shapes = fit_result[0] result = calc_func(model, df, shapes) df[model] = result @@ -531,6 +524,7 @@ def _save_table_and_plot(cidx, models, options, mod_results, name, df, plt.close('all') + def _pad_plot_frame(ax, pad=0.01): """ Provides padding on sides of frame equal to pad fraction of plot @@ -550,32 +544,5 @@ def _pad_plot_frame(ax, pad=0.01): return ax -def _data_pred_curve(cidx, models, options, emp_results, mod_results): - raise NotImplementedError, "Data and curve comparison not implemented" - - -def _write_test_statistics(cidx, models, options, mod_results): - # TODO: Add delta test statistics columns - - f = open(_get_file_path(cidx, options, "test_statistics.csv"), 'w') - - # Gets stat name list from any element of result dict - same for all models - stat_names_list = next(mod_results[cidx].itervalues())[2] - stat_names_str = str(stat_names_list)[1:-1].strip("'") - - f.write("Theory, %s\n" % stat_names_str) - - for model in models: - mod_result = mod_results[cidx][model] - mod_stats = str(mod_result[3])[1:-1] - f.write("%s,%s\n" % (model, mod_stats)) - f.close() - - -def _get_file_path(cidx, options, file_name): - return os.path.join(options['run_dir'], - '%i_%s' % (cidx+1, file_name)) - - if __name__ == '__main__': main(sys.argv[1]) From 4c565f353a4c1abc0c7bc88710e162cecf3addb8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 16:35:54 -0700 Subject: [PATCH 098/343] Move docstring inherit decorator to misc --- macroeco/distributions2.py | 24 +----------------------- macroeco/misc.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/macroeco/distributions2.py b/macroeco/distributions2.py index a18ba85..1aa8426 100644 --- a/macroeco/distributions2.py +++ b/macroeco/distributions2.py @@ -48,29 +48,7 @@ import scipy.optimize as optim import scipy.special as special -def inherit_docstring_from(cls): - """ - This decorator modifies the decorated function's docstring by - replacing occurrences of '%(super)s' with the docstring of the - method of the same name from the class `cls`. - - If the decorated method has no docstring, it is simply given the - docstring of `cls`s method. - - From scipy.misc.doccer - - """ - def _doc(func): - cls_docstring = getattr(cls, func.__name__).__doc__ - func_docstring = func.__doc__ - if func_docstring is None: - func.__doc__ = cls_docstring - else: - new_docstring = func_docstring % dict(super=cls_docstring) - func.__doc__ = new_docstring - return func - return _doc - +from misc import inherit_docstring_from _doc_default_callparams = \ """ diff --git a/macroeco/misc.py b/macroeco/misc.py index 4737f40..ad85d29 100644 --- a/macroeco/misc.py +++ b/macroeco/misc.py @@ -79,3 +79,27 @@ def run_with_except_hook(*args, **kw): sys.excepthook(*sys.exc_info()) self.run = run_with_except_hook thread.Thread.__init__ = init + + +def inherit_docstring_from(cls): + """ + This decorator modifies the decorated function's docstring by + replacing occurrences of '%(super)s' with the docstring of the + method of the same name from the class `cls`. + + If the decorated method has no docstring, it is simply given the + docstring of `cls`s method. + + From scipy.misc.doccer + + """ + def _doc(func): + cls_docstring = getattr(cls, func.__name__).__doc__ + func_docstring = func.__doc__ + if func_docstring is None: + func.__doc__ = cls_docstring + else: + new_docstring = func_docstring % dict(super=cls_docstring) + func.__doc__ = new_docstring + return func + return _doc From bca0ce65ee7bfc45427ecdc29a4175e657fcfdbd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 22 Mar 2014 16:44:35 -0700 Subject: [PATCH 099/343] Add twiggy to requirements --- doc/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/requirements.txt b/doc/requirements.txt index ed44683..d740b97 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,7 +2,8 @@ scipy numpy matplotlib pandas -numpydoc shapely configparser +twiggy +numpydoc macroeco From 93d8b385b5ae9a01c0fb90755f3fdffbdeb04e79 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 13:44:01 -0700 Subject: [PATCH 100/343] Move doctoring substitution decorator to misc --- macroeco/empirical.py | 7 +------ macroeco/misc.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index f915d38..2f9d244 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -40,12 +40,7 @@ import scipy.spatial.distance as dist #import shapely.geometry as geo - -def doc_sub(*sub): - def dec(obj): - obj.__doc__ = obj.__doc__.format(*sub) - return obj - return dec +from misc import doc_sub metric_params = \ """patch : Patch obj diff --git a/macroeco/misc.py b/macroeco/misc.py index ad85d29..1782ac4 100644 --- a/macroeco/misc.py +++ b/macroeco/misc.py @@ -103,3 +103,21 @@ def _doc(func): func.__doc__ = new_docstring return func return _doc + + +def doc_sub(*sub): + """ + Decorator for performing substitutions in docstrings. + + Using @doc_sub(some_note, other_note) on a function with {0} and {1} in the + docstring will substitute the contents of some_note and other_note for {0} + and {1}, respectively. + + Decorator appears to work properly both with IPython help (tab completion + and ?) and with Sphinx. + + """ + def dec(obj): + obj.__doc__ = obj.__doc__.format(*sub) + return obj + return dec From 3f6b4ae4842699cae5c2c70e26f042a3d8c222f7 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 13:44:51 -0700 Subject: [PATCH 101/343] New log file line format, also use local time instead of GMT --- macroeco/misc.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/macroeco/misc.py b/macroeco/misc.py index 1782ac4..2f1a5ff 100644 --- a/macroeco/misc.py +++ b/macroeco/misc.py @@ -6,6 +6,7 @@ import traceback import sys import os +import time import threading as thread @@ -43,16 +44,25 @@ def log_uncaught(type1, value1, traceback1): def _logger_outputs(log_path): - # To ensure that Macroeco Desktop captures stdout, we just print it + # std_format - to ensure Macroeco Desktop shows logging, we just print class stdLineFormat(twiggy.formats.LineFormat): def __call__(self, msg): text = self.format_text(msg) print "{text}".format(**locals()) return "" - - # Choose formats for file and stdout - file_format = twiggy.formats.LineFormat(traceback_prefix='') std_format = stdLineFormat(traceback_prefix='') + + # file_format - customized to show local time, etc + conversion = twiggy.lib.converter.ConversionTable() + conversion.add("time", _logger_better_time, "[{1}]".format) + conversion.add("name", str, "{{{1}}}".format) + conversion.add("level", str, "{1}".format) + conversion.aggregate = ' '.join + conversion.genericValue = str + conversion.genericItem = "{0}={1}".format + + file_format = twiggy.formats.LineFormat(traceback_prefix='', separator=' ', + conversion=conversion) # Set up outputs for file and stdout and create emitters file_output = twiggy.outputs.FileOutput(log_path, format=file_format) @@ -61,6 +71,10 @@ def __call__(self, msg): return file_output, std_output +def _logger_better_time(gmtime=None): + return time.strftime("%Y/%m/%d %H:%M:%S %p", time.localtime()) + + def _installThreadExcepthook(): """ Make threads use sys.excepthook from parent process @@ -74,6 +88,7 @@ def run_with_except_hook(*args, **kw): try: run_old(*args, **kw) except (KeyboardInterrupt, SystemExit): + raise except: sys.excepthook(*sys.exc_info()) From ad836bf0942aaef85bdffd3f68ef4462f9eaf549 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 14:58:22 -0700 Subject: [PATCH 102/343] Uncomment shapely import, add note to check works with pyinstaller --- macroeco/empirical.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 2f9d244..76ed47b 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -38,7 +38,8 @@ import itertools from copy import deepcopy import scipy.spatial.distance as dist -#import shapely.geometry as geo +import shapely.geometry as geo +# TODO: Make shapely import work with pyinstaller from misc import doc_sub From 1df26e424ba47b5c290933cf06f4bfa698f0c10f Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 14:58:52 -0700 Subject: [PATCH 103/343] Note database method likely broken --- macroeco/empirical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 76ed47b..94cbe98 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -197,6 +197,7 @@ def _get_db_table(self, data_path, type): The database query as a recarray """ + # TODO: This is probably broken # Load table if type == 'sql': @@ -1236,4 +1237,3 @@ def _product(*args, **kwds): for pool in pools: result = [x+[y] for x in result for y in pool] return result - From 117ee006643bb802bb1d5dd01ec723c1603c1986 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 14:59:09 -0700 Subject: [PATCH 104/343] Small cleanup and comments to sad and ssad --- macroeco/empirical.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/macroeco/empirical.py b/macroeco/empirical.py index 94cbe98..c142e55 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -298,32 +298,33 @@ def sad(patch, cols, splits='', clean=True): """ - # Get required variables - spp_col, count_col = ( - [cols.get(x, None) for x in ['spp_col', 'count_col']] ) - full_spp_list = ( - np.unique(patch.table[spp_col]) ) + full_spp_list = np.unique(patch.table[spp_col]) - # Run analysis + # Loop through each split result_list = [] for substring, subtable in _yield_subtables(patch, splits): + # Get abundance for each species sad_list = [] for spp in full_spp_list: - this_spp = (subtable[spp_col] == spp) + this_spp = (subtable[cols['spp_col']] == spp) if count_col: - count = np.sum(subtable[count_col][this_spp]) + count = np.sum(subtable[cols['count_col']][this_spp]) else: count = np.sum(this_spp) sad_list.append(count) + # Create dataframe of spp names and abundances subdf = pd.DataFrame({'spp': full_spp_list, 'y': sad_list}) + # Remove zero abundance rows if requested if clean: subdf = subdf[subdf['y'] > 0] + # Append split result result_list.append((substring, subdf)) + # Return all results return result_list @@ -349,24 +350,28 @@ def ssad(patch, cols, splits=''): """ + # Get and check SAD sad_results = sad(patch, cols, splits, clean=False) if len(sad_results) == 1: raise ValueError, ("SSAD requires patch to be split into more than " "one subplot") + # Create dataframe with col for spp name and numbered col for each split for i, sad_result in enumerate(sad_results): if i == 0: # For first result, create dataframe fulldf = sad_result[1] fulldf.columns = ['spp', '0'] # Renames y col to 0 - else: # For other results, append col to dataframe + else: # For other results, append col to dataframe, named by num fulldf[str(i)] = sad_result[1]['y'] + # Get each spp SSAD (row of fulldf) and append as tuple in result_list result_list = [] - for row in fulldf.iterrows(): # Grab result for each species by row - row_values_array = np.array(row[1][1:], dtype=float) - result_list.append((row[1][0], pd.DataFrame({'y': row_values_array}))) + for _, row in fulldf.iterrows(): + row_values_array = np.array(row[1:], dtype=float) + result_list.append((row[0], pd.DataFrame({'y': row_values_array}))) + # Return all results return result_list From e8f7f520bfc8dbd9ef8626e3bd899656157b9d58 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 15:47:07 -0700 Subject: [PATCH 105/343] Remove data module --- macroeco/__init__.py | 5 +- macroeco/data.py | 370 ------------------------------------------ macroeco/test_data.py | 129 --------------- 3 files changed, 1 insertion(+), 503 deletions(-) delete mode 100644 macroeco/data.py delete mode 100644 macroeco/test_data.py diff --git a/macroeco/__init__.py b/macroeco/__init__.py index 5f9b4d5..1f9b5f5 100644 --- a/macroeco/__init__.py +++ b/macroeco/__init__.py @@ -7,8 +7,5 @@ __email__ = "jkitzes@berkeley.edu" __status__ = "Development" -import compare -import data import empirical -import output -import utils.workflow as workflow +import compare diff --git a/macroeco/data.py b/macroeco/data.py deleted file mode 100644 index f0f6bd9..0000000 --- a/macroeco/data.py +++ /dev/null @@ -1,370 +0,0 @@ -#!/usr/bin/python - -''' -Routines for loading census data and metadata. - -Classes -------- -- `DataTable` -- data and metadata for a single censused area -- `Metadata` -- load and parse EML metadata for data file -''' - -from __future__ import division -import os -import logging -import numpy as np -import xml.etree.ElementTree as etree -from matplotlib.mlab import csv2rec -import sqlite3 as lite - - -class DataTable: - ''' - Class to hold data table and metadata. - - Parameters - ---------- - data_path : str - Path to data - location of metadata determined from this path. - subset : str - An SQL query string - - Attributes - ---------- - asklist : list - A list of tuples of column name and attribute, e.g., [('x', - 'precision'), ('y', 'maximum')], that defines the columns and - parameters that are needed for analysis. Defined in data_load method. - table : recarray - Census data table. - meta : dict - Dictionary of metadata needed for analysis. Needed variables for each - column are defined in asklist - ''' - - def __init__(self, data_path, subset={}): - '''Initialize DataTable object. See class docstring.''' - - self.table, self.meta = self.data_load(data_path, subset=subset) - - - def data_load(self, data_path, subset={}): - ''' - Load data and metadata from files. - - Parameters - ---------- - data_path : str - Path to data table file. - - Returns - ------- - table : recarray - Census data table. - meta : dict - Dictionary of metadata associated with table. - ''' - end = data_path.split('.')[-1] - # Check that file is csv. If so, read in as rec array - if end == 'csv': - table = csv2rec(data_path) - # Load main table - dtype detected automatically - # Use panda to load and convert to records - #table = pd.read_csv(data_path) - # Check if there is a column named index, if so rename and delete - # it. Why? Index is a special word - #if hasattr(table, 'index'): - # table['index_1'] = table['index'] - # del table['index'] - - #table = table.to_records() - - elif end == 'db' or end == 'sql': - - if type(subset) == type({}): - raise ValueError('No SQL query string provided') - - table = db_table(data_path, subset) - else: - raise TypeError('Cannot handle file of type %s' % end) - - # Store asklist defining columns and fields needed for analysis. - # asklist is - self.asklist = [] - for name in table.dtype.names: - self.asklist.append((name, 'minimum')) - self.asklist.append((name, 'maximum')) - self.asklist.append((name, 'precision')) - self.asklist.append((name, 'type')) - - # Load metadata from file - meta = Metadata(data_path, self.asklist).meta_dict - - return table, meta - - - def get_subtable(self, subset): - ''' - Return subtable matching all conditions in subset. - - Parameters - ---------- - subset : dict - Dictionary of conditions for subsetting data (see description in - Patch Class docstring). - - Returns - ------- - subtable : ndarray - Subtable with records from table meeting requirements in subset. - - ''' - - # If no subset, return original table - if subset == {}: - return self.table - - # Declare array to track valid rows of table - valid = np.ones(len(self.table), dtype=bool) - - # TODO: Add ability to do logical or - and is just multiple subsets on - # same column. - for key, value in subset.iteritems(): - if type(value) is not type(['a']): # Make all iterables - value = [value] - - # Merge tuples into a string - merged_values = [] - for val in value: - try: # check if val[1] is a string - eval(str(val[1])) - merged_values.append(val[0] + str(val[1])) - except: - merged_values.append(val[0] + "'" + val[1] + "'") - - for this_value in merged_values: - if this_value != "=='whole'": - this_valid = eval("self.table[key]" + this_value) - valid = np.logical_and(valid, this_valid) - - subtable = self.table[valid] - return subtable - - -class Metadata: - ''' - Metadata values for any analysis stored using Ecological Metadata Language. - - Parameters - ---------- - data_path : str - Path to csv data file. Metadata file must be in same dir, with same - filename, but with .xml extension. - - Attributes - ---------- - valid_file : bool - Whether valid metadata file was found. - root : object - Root of Element Tree representation of metadata xml file. - meta_dict : dict - Dictionary of metadata with values given by asklist. - - ''' - - def __init__(self, data_path, asklist): - '''Initialize Metadata object. See class docstring.''' - - # Get path to metadata file - data_path, data_extension = os.path.splitext(data_path) - xml_path = os.path.abspath(os.path.join(data_path + '.xml')) - - # Determine if metadata file is valid and if so store self.root - self.valid_file = True - - try: - open(xml_path) - except: - logging.info('Missing or invalid metadata file at %s' % xml_path) - self.valid_file = False - - try: - self.root = etree.ElementTree(file=xml_path).getroot() - except: - logging.info('Error parsing metadata file at %s' % xml_path) - self.root = None - self.valid_file = False - - # Check if metadata file is missing or invalid, if so return None - if self.valid_file == False: - self.meta_dict = None - else: - self.meta_dict = self.get_meta_dict(asklist) - - - def get_meta_dict(self, asklist): - ''' - Parse metadata dictionary from xml file. - - Parameters - ---------- - asklist : list - A list of tuples of column name and attribute, e.g., [('x', - 'precision'), ('y', 'maximum')], that defines the columns and - parameters that are needed for analysis. - - Returns - ------- - meta_dict : dict - Dictionary of metadata values for each item in asklist, in form - {('column_name', 'element'): value}. column_name in data table is - equivalent to attribute in xml. - ''' - - # TODO: Column attribute will be None if either column entry does not - # exist in metadata or if column entry exists but attribute is missing. - # We may want to distinguish these, perhaps just with logging. - - # Populate dictionary of metadata values for asklist items - meta_dict = {} - - for item in asklist: - # Get list of all elements for this attribute - all_elements = self.get_all_elements(item[0]) - - # Get value of element for this attribute if it exists - if all_elements is None: - value = None - else: - value = self.get_element_value(all_elements, item[1], item[0]) - - # Eval value if possible and log outcome - try: - value = eval(value) - value_type = str(type(value)).split("'")[1] - logging.debug('Metadata value %s, %s evaluated to %s' % - (item[0], item[1], value_type)) - except: - logging.debug('Metadata value %s, %s left as string' % - (item[0], item[1])) - - # Store value for this item - meta_dict[item] = value - - return meta_dict - - - def get_all_elements(self, attribute): - '''Returns list of XML elements of type attribute for attribute.''' - - attributes = self.root.findall('.//dataTable/attributeList/attribute') - for a in attributes: - if a.find('.//attributeName').text == attribute: - return a - - - def get_element_value(self, all_elements, element_name, col_name): - '''Returns value of attribute_name from all_attributes list.''' - if element_name == 'type': - if len(all_elements.findall('.//dateTime')) == 1: - return 'ordinal' - elif len(all_elements.findall('.//interval')) == 1: - return 'interval' - elif len(all_elements.findall('.//ordinal')) == 1: - return 'ordinal' - elif len(all_elements.findall('.//nominal')) == 1: - return 'nominal' - elif len(all_elements.findall('.//ratio')) == 1: - return 'ratio' - else: - logging.warning("Could not find recognizable column type. " +\ - "Setting type of column name '%s' to ordinal." %\ - col_name) - return 'ordinal' - else: - try: - value = all_elements.find('.//%s' % element_name).text - return value - except AttributeError: - return None - - - def get_physical_coverage(self): - '''Returns a tuple of physical limits of the dataset (NESW).''' - coords = self.root.find('.//coverage/geographicCoverage/' + - 'boundingCoordinates') - bounds = [] - for d in ('north','east','south','west'): - bounds.append(float(coords.find('%sBoundingCoordinate'%d).text)) - return bounds - - - def get_title(self): - '''Extracts the title of the dataset. Not currently used.''' - return self.root.find('.//dataset/title').text - -def db_table(data_path, query_str): - '''Query a database and return query result as a recarray - - Parameters - ---------- - data_path : str - The data_path of the .db file - query_str : str - The SQL query string - - Returns - ------- - table : recarray - The database query as a recarray - - ''' - - end = data_path.split('.')[-1] - - if end == 'sql': - - def readData(): - f = open(data_path, 'r') - - with f: - data = f.read() - return data - - con = lite.connect(':memory:') - con.row_factory = lite.Row - - cur = con.cursor() - sql = readData() - cur.executescript(sql) - - elif end == 'db': - - con = lite.connect(data_path) - - con.row_factory = lite.Row - cur = con.cursor() - - cur.execute(query_str) - db_info = cur.fetchall() - try: - col_names = db_info[0].keys() - except IndexError: - raise lite.OperationalError("Query '%s' to database '%s' is empty" % - (query_str, data_path)) - - # Convert objects to tuples - converted_info = [tuple(x) for x in db_info] - - # NOTE: Using default value for Unicode: Seems better than checking - # lengths. Should we keep the type as unicode? - dtypes = [type(x) if type(x) != unicode else 'S150' for x in db_info[0]] - - table = np.array(converted_info, dtype=zip(col_names, dtypes)) - con.commit() - con.close() - - # Return a recarray for consistency - return table.view(np.recarray) - - diff --git a/macroeco/test_data.py b/macroeco/test_data.py deleted file mode 100644 index 1c2c0ab..0000000 --- a/macroeco/test_data.py +++ /dev/null @@ -1,129 +0,0 @@ -''' -Unit tests for data.py -''' - -import unittest -import os -import numpy as np -from matplotlib.mlab import csv2rec -from macroeco.data import DataTable, Metadata - -class TestDataTable(unittest.TestCase): - - def setUp(self): - '''Write test xytable csv file.''' - - self.xyfile1 = open('xyfile1.csv','w') - self.xyfile1.write('''spp_code, x, y, count - 0, 0, 0, 1 - 0, 0, 0, 2 - 0, 0, 1, 1 - 1, 0, 0, 1 - 1, 1, 0, 2''') - self.xyfile1.close() - self.xyarr1 = csv2rec('xyfile1.csv') - - def tearDown(self): - os.remove('xyfile1.csv') - - def test_error_if_file_type_not_csv(self): - self.assertRaises(TypeError, DataTable, 'file.txt') - - def test_meta_None_if_no_meta_file(self): - xy1 = DataTable('xyfile1.csv') - self.assertEqual(xy1.meta, None) - - def test_table_is_correct(self): - xy1 = DataTable('xyfile1.csv') - np.testing.assert_array_equal(xy1.table, self.xyarr1) - - def test_get_subtable(self): - xy1 = DataTable('xyfile1.csv') - xy1.meta = {('x', 'maximum'): 1, - ('x', 'minimum'): 0, - ('x', 'precision'): 1, - ('y', 'maximum'): 1, - ('y', 'minimum'): 0, - ('y', 'precision'): 1} - - # Whole table - sub = xy1.get_subtable({}) - np.testing.assert_array_equal(sub, self.xyarr1) - - sub = xy1.get_subtable({'x': [('>=', 0),('<', 2)], 'y': [('>=', 0), - ('<', 2)]}) - np.testing.assert_array_equal(sub, self.xyarr1) - - # Subset - sub = xy1.get_subtable({'spp_code': ('==', 0)}) - np.testing.assert_array_equal(sub, self.xyarr1[0:3]) - - sub = xy1.get_subtable({'spp_code': ('==', 0), 'x': ('>', 0)}) - np.testing.assert_array_equal(sub, self.xyarr1[2]) - -class TestMetadata(unittest.TestCase): - - def setUp(self): - '''Write test data and metadata file.''' - - self.xyfile1 = open('xyfile1.csv','w') - self.xyfile1.write('''x, y - 0, 0 - 0, 0 - 0, 0 - 1, 0 - 1, 1''') - self.xyfile1.close() - - self.xymeta = open('xyfile1.xml','w') - self.xymeta.write(''' - - -Unittest XML - -NA --79.5915 --79.5915 -8.975 -10 - - - - -y -cell -x0.0 -99.90.1 -''') - self.xymeta.close() - - def tearDown(self): - os.remove('xyfile1.csv') - os.remove('xyfile1.xml') - - def test_metadata_correct_read(self): - # Should read values correctly from sample file, including None for - # attributes that do not exist and elements that do not exist. - xy1 = DataTable('xyfile1.csv') - self.assertEqual(len(xy1.meta), 8) - self.assertEqual(xy1.meta, {('x', 'maximum'): 99.9, - ('x', 'minimum'): 0.0, - ('x', 'precision'): 0.1, - ('x', 'type'): 'interval', - ('y', 'maximum'): None, - ('y', 'minimum'): None, - ('y', 'precision'): None, - ('y', 'type'): 'ordinal'}) - - def test_physical_coverage(self): - meta = Metadata('xyfile1.csv', []) - edges = meta.get_physical_coverage() - self.assertEqual(edges, [8.975, -79.5915, 10, -79.5915]) - - def test_title(self): - meta = Metadata('xyfile1.csv', []) - self.assertEqual(meta.get_title(), 'Unittest XML') From b3455a8cb8bcd25d59c9b8d55fb063500482d56d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 15:52:49 -0700 Subject: [PATCH 106/343] Rename distributions2 to models --- macroeco/main.py | 2 +- macroeco/{distributions2.py => models.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename macroeco/{distributions2.py => models.py} (99%) diff --git a/macroeco/main.py b/macroeco/main.py index a1dcb17..eda3fe2 100644 --- a/macroeco/main.py +++ b/macroeco/main.py @@ -33,7 +33,7 @@ from misc import get_log import empirical as emp -import distributions2 as mod +import models as mod import compare as comp diff --git a/macroeco/distributions2.py b/macroeco/models.py similarity index 99% rename from macroeco/distributions2.py rename to macroeco/models.py index 1aa8426..36dafc2 100644 --- a/macroeco/distributions2.py +++ b/macroeco/models.py @@ -1,6 +1,6 @@ """ ============================================== -Distributions (:mod:`macroeco.distributions2`) +Models (:mod:`macroeco.models`) ============================================== This module contains distributions commonly used in analysis of ecological From f357301719ab607e174a94747c9204d2365b166f Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 15:53:47 -0700 Subject: [PATCH 107/343] Clean up title line lengths in module docstrings --- macroeco/compare.py | 4 ++-- macroeco/empirical.py | 4 ++-- macroeco/models.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/macroeco/compare.py b/macroeco/compare.py index 4cc3c3a..0b288b7 100644 --- a/macroeco/compare.py +++ b/macroeco/compare.py @@ -1,7 +1,7 @@ """ -=========================== +================================= Compare (:mod:`macroeco.compare`) -=========================== +================================= This module contains functions that compare the goodness of fit of a distribution/curve to data or the fit of two distributions/curves to each diff --git a/macroeco/empirical.py b/macroeco/empirical.py index c142e55..48d7192 100644 --- a/macroeco/empirical.py +++ b/macroeco/empirical.py @@ -1,7 +1,7 @@ """ -============================================== +===================================== Empirical (:mod:`macroeco.empirical`) -============================================== +===================================== This module contains functions used in the empirical analysis of macroecological patterns. diff --git a/macroeco/models.py b/macroeco/models.py index 36dafc2..51006a6 100644 --- a/macroeco/models.py +++ b/macroeco/models.py @@ -1,7 +1,7 @@ """ -============================================== +=============================== Models (:mod:`macroeco.models`) -============================================== +=============================== This module contains distributions commonly used in analysis of ecological patterns. At present, all distributions here are univariate. From fcfcada64f120c5077b3acbcd70ffce5c8a6921b Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 16:05:54 -0700 Subject: [PATCH 108/343] Move non-deprecated modules to subdirs --- macroeco/{ => compare}/compare.py | 0 macroeco/{ => compare}/test_compare.py | 0 macroeco/{ => empirical}/empirical.py | 0 macroeco/{ => empirical}/test_empirical.py | 0 macroeco/{ => main}/main.py | 0 macroeco/{ => misc}/misc.py | 0 macroeco/{ => models}/models.py | 0 macroeco/{test_distributions2.py => models/test_models.py} | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename macroeco/{ => compare}/compare.py (100%) rename macroeco/{ => compare}/test_compare.py (100%) rename macroeco/{ => empirical}/empirical.py (100%) rename macroeco/{ => empirical}/test_empirical.py (100%) rename macroeco/{ => main}/main.py (100%) rename macroeco/{ => misc}/misc.py (100%) rename macroeco/{ => models}/models.py (100%) rename macroeco/{test_distributions2.py => models/test_models.py} (100%) diff --git a/macroeco/compare.py b/macroeco/compare/compare.py similarity index 100% rename from macroeco/compare.py rename to macroeco/compare/compare.py diff --git a/macroeco/test_compare.py b/macroeco/compare/test_compare.py similarity index 100% rename from macroeco/test_compare.py rename to macroeco/compare/test_compare.py diff --git a/macroeco/empirical.py b/macroeco/empirical/empirical.py similarity index 100% rename from macroeco/empirical.py rename to macroeco/empirical/empirical.py diff --git a/macroeco/test_empirical.py b/macroeco/empirical/test_empirical.py similarity index 100% rename from macroeco/test_empirical.py rename to macroeco/empirical/test_empirical.py diff --git a/macroeco/main.py b/macroeco/main/main.py similarity index 100% rename from macroeco/main.py rename to macroeco/main/main.py diff --git a/macroeco/misc.py b/macroeco/misc/misc.py similarity index 100% rename from macroeco/misc.py rename to macroeco/misc/misc.py diff --git a/macroeco/models.py b/macroeco/models/models.py similarity index 100% rename from macroeco/models.py rename to macroeco/models/models.py diff --git a/macroeco/test_distributions2.py b/macroeco/models/test_models.py similarity index 100% rename from macroeco/test_distributions2.py rename to macroeco/models/test_models.py From 6b81177b692f52d9ee1bf040c039d42dc8d95476 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 16:23:12 -0700 Subject: [PATCH 109/343] Finish converting distributions2 to models in doc --- doc/distributions2.rst | 1 - doc/index.rst | 2 +- doc/models.rst | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 doc/distributions2.rst create mode 100644 doc/models.rst diff --git a/doc/distributions2.rst b/doc/distributions2.rst deleted file mode 100644 index be356a6..0000000 --- a/doc/distributions2.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: macroeco.distributions2 diff --git a/doc/index.rst b/doc/index.rst index ae9224e..35ad3ca 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -12,7 +12,7 @@ Welcome to macroeco. :maxdepth: 2 empirical - distributions2 + models compare diff --git a/doc/models.rst b/doc/models.rst new file mode 100644 index 0000000..a21b662 --- /dev/null +++ b/doc/models.rst @@ -0,0 +1 @@ +.. automodule:: macroeco.models From dd231c8efd9d751413640b6b11e8066c261aaf00 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 16:44:02 -0700 Subject: [PATCH 110/343] Update all __init__ and imports to package syntax --- macroeco/__init__.py | 13 +++++++---- macroeco/compare/__init__.py | 29 ++++++++++++++++++++++++ macroeco/compare/compare.py | 29 ------------------------ macroeco/empirical/__init__.py | 31 +++++++++++++++++++++++++ macroeco/empirical/empirical.py | 31 +------------------------ macroeco/misc/__init__.py | 1 + macroeco/models/__init__.py | 40 +++++++++++++++++++++++++++++++++ macroeco/models/models.py | 40 +-------------------------------- 8 files changed, 112 insertions(+), 102 deletions(-) create mode 100644 macroeco/compare/__init__.py create mode 100644 macroeco/empirical/__init__.py create mode 100644 macroeco/misc/__init__.py create mode 100644 macroeco/models/__init__.py diff --git a/macroeco/__init__.py b/macroeco/__init__.py index 1f9b5f5..dd7c5fa 100644 --- a/macroeco/__init__.py +++ b/macroeco/__init__.py @@ -1,11 +1,16 @@ -__author__ = "Justin Kitzes, Mark Wilber, Chloe Lewis" -__copyright__ = "Copyright 2012, Regents of University of California" -__credits__ = [] +""" +Macroeco: Ecological pattern analysis in Python +""" + +__author__ = "Justin Kitzes and Mark Wilber" +__copyright__ = "Copyright 2012-2014, Regents of University of California" __license__ = "BSD 2-clause" -__version__ = "0.2" +__version__ = "0.3" __maintainer__ = "Justin Kitzes" __email__ = "jkitzes@berkeley.edu" __status__ = "Development" import empirical +import models import compare +import misc diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py new file mode 100644 index 0000000..e6eba16 --- /dev/null +++ b/macroeco/compare/__init__.py @@ -0,0 +1,29 @@ +""" +================================= +Compare (:mod:`macroeco.compare`) +================================= + +This module contains functions that compare the goodness of fit of a +distribution/curve to data or the fit of two distributions/curves to each +other. + +Comparison Functions +==================== + +.. autosummary:: + :toctree: generated/ + + get_AIC + get_AICC + get_AIC_weights + get_nll + get_empirical_cdf + get_sum_of_squares + get_r_squared + get_chi_squared + get_lrt + bin_data + +""" + +from .compare import * diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index 0b288b7..50b31e4 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -1,31 +1,3 @@ -""" -================================= -Compare (:mod:`macroeco.compare`) -================================= - -This module contains functions that compare the goodness of fit of a -distribution/curve to data or the fit of two distributions/curves to each -other. - -Comparison Functions -==================== - -.. autosummary:: - :toctree: generated/ - - get_AIC - get_AICC - get_AIC_weights - get_nll - get_empirical_cdf - get_sum_of_squares - get_r_squared - get_chi_squared - get_lrt - bin_data - -""" - from __future__ import division import numpy as np @@ -33,7 +5,6 @@ import scipy.stats as stats import pandas as pd -from distributions import * # NOTE: get_* functions usually refer to a method within a class. I would # suggest dropping all of the get prefixes diff --git a/macroeco/empirical/__init__.py b/macroeco/empirical/__init__.py new file mode 100644 index 0000000..31f6f14 --- /dev/null +++ b/macroeco/empirical/__init__.py @@ -0,0 +1,31 @@ +""" +===================================== +Empirical (:mod:`macroeco.empirical`) +===================================== + +This module contains functions used in the empirical analysis of +macroecological patterns. + +Patch +===== + +Patch is a class. + +.. autosummary:: + :toctree: generated/ + + Patch + +Metrics +======= + +.. autosummary:: + :toctree: generated/ + + sad + ssad + +""" + +from .empirical import (Patch, + sad, ssad) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 48d7192..8927db1 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -1,32 +1,3 @@ -""" -===================================== -Empirical (:mod:`macroeco.empirical`) -===================================== - -This module contains functions used in the empirical analysis of -macroecological patterns. - -Patch -===== - -Patch is a class. - -.. autosummary:: - :toctree: generated/ - - Patch - -Metrics -======= - -.. autosummary:: - :toctree: generated/ - - sad - ssad - -""" - from __future__ import division import os import numpy as np @@ -41,7 +12,7 @@ import shapely.geometry as geo # TODO: Make shapely import work with pyinstaller -from misc import doc_sub +from ..misc import doc_sub metric_params = \ """patch : Patch obj diff --git a/macroeco/misc/__init__.py b/macroeco/misc/__init__.py new file mode 100644 index 0000000..0789bdb --- /dev/null +++ b/macroeco/misc/__init__.py @@ -0,0 +1 @@ +from .misc import * diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py new file mode 100644 index 0000000..f3cf781 --- /dev/null +++ b/macroeco/models/__init__.py @@ -0,0 +1,40 @@ +""" +=============================== +Models (:mod:`macroeco.models`) +=============================== + +This module contains distributions commonly used in analysis of ecological +patterns. At present, all distributions here are univariate. + +Most of these distributions are subclasses of `~scipy.stats.rv_continuous` and +`~scipy.stats.rv_discrete` found in `scipy.stats`. Additionally, several of the +distribution classes here are simple wrappers for existing distributions found +in `scipy.stats` that are updated to allow the use of common ecological +parameterizations. + +Continouous distributions +========================= + +.. autosummary:: + :toctree: generated/ + + expon + expon_uptrunc + +Discrete distributions +====================== + +.. autosummary:: + :toctree: generated/ + + geom + geom_uptrunc + nbinom + +.. DV: + Our public-facing distributions do not use location and scale parameters, as + they are not common in quantitative ecology. +""" + +from models import (geom, geom_uptrunc, nbinom, + expon, expon_uptrunc) diff --git a/macroeco/models/models.py b/macroeco/models/models.py index 51006a6..dd406c4 100644 --- a/macroeco/models/models.py +++ b/macroeco/models/models.py @@ -1,41 +1,3 @@ -""" -=============================== -Models (:mod:`macroeco.models`) -=============================== - -This module contains distributions commonly used in analysis of ecological -patterns. At present, all distributions here are univariate. - -Most of these distributions are subclasses of `~scipy.stats.rv_continuous` and -`~scipy.stats.rv_discrete` found in `scipy.stats`. Additionally, several of the -distribution classes here are simple wrappers for existing distributions found -in `scipy.stats` that are updated to allow the use of common ecological -parameterizations. - -Continouous distributions -========================= - -.. autosummary:: - :toctree: generated/ - - expon - expon_uptrunc - -Discrete distributions -====================== - -.. autosummary:: - :toctree: generated/ - - geom - geom_uptrunc - nbinom - -.. DV: - Our public-facing distributions do not use location and scale parameters, as - they are not common in quantitative ecology. -""" - from __future__ import division from decimal import Decimal @@ -48,7 +10,7 @@ import scipy.optimize as optim import scipy.special as special -from misc import inherit_docstring_from +from ..misc import inherit_docstring_from _doc_default_callparams = \ """ From 7b9e0683fb60aa0bc275050b1cf7377379f6af9e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 16:46:06 -0700 Subject: [PATCH 111/343] Remove all get_ prefixes from compare --- macroeco/compare/compare.py | 32 ++++++++++++++------------------ macroeco/compare/test_compare.py | 32 ++++++++++++++++---------------- 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index 50b31e4..b29ee5c 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -6,11 +6,7 @@ import pandas as pd -# NOTE: get_* functions usually refer to a method within a class. I would -# suggest dropping all of the get prefixes - - -def get_nll(values): +def nll(values): """ Calculate negative log likelihood from an array of pdf/pmf values. """ @@ -19,17 +15,17 @@ def get_nll(values): return -np.sum(np.log(values)) -def get_AIC(values, params): +def AIC(values, params): """ Calculate AIC given values of a pdf/pmf and a set of model parameters. """ values, params = _to_arrays(values, params) k = len(params) # Num parameters - L = get_nll(values) + L = nll(values) return 2 * k + 2 * L -def get_AICC(values, params): +def AICC(values, params): """ Calculate AICC given values of a pdf/pmf and a set of model parameters. @@ -49,10 +45,10 @@ def get_AICC(values, params): values, params = _to_arrays(values, params) k = len(params) # Num parameters n = len(values) # Num observations - return get_AIC(values, params) + (2 * k * (k + 1)) / (n - k - 1) + return AIC(values, params) + (2 * k * (k + 1)) / (n - k - 1) -def get_AIC_weights(aic_values): +def AIC_weights(aic_values): """ Calculates the aic_weights for a given set of models. @@ -82,7 +78,7 @@ def get_AIC_weights(aic_values): return weights, delta -def get_empirical_cdf(data): +def empirical_cdf(data): """ Generates an empirical cdf from empirical data @@ -146,10 +142,10 @@ def total_loss(self, obs, pred): obs, pred = _to_arrays(obs, pred) return np.sum(eval(self.loss_fxn)) -get_sum_of_squares = gen_loss_function('(obs - pred)**2').total_loss +sum_of_squares = gen_loss_function('(obs - pred)**2').total_loss -def get_r_squared(obs, pred): +def r_squared(obs, pred): """ Get's the R^2 value for a regression of observed data (X) and predicted (Y) @@ -169,7 +165,7 @@ def get_r_squared(obs, pred): return r ** 2 -def get_ks_two_sample(): +def ks_two_sample(): """ Two sample Kolmogorov Smirnov distribution. Uses the cumulative distribution functions to test whether two samples were drawn from the same @@ -181,11 +177,11 @@ def get_ks_two_sample(): pass -def get_ks_one_sample(): +def ks_one_sample(): pass -def get_lrt(nll_null, nll_alt, df): +def lrt(nll_null, nll_alt, df): """ This functions compares two nested models using the likelihood ratio test. @@ -226,11 +222,11 @@ def get_lrt(nll_null, nll_alt, df): test_stat = -2 * (ll_null - ll_alt) return (test_stat, stats.chisqprob(test_stat, df)) -def get_bayes_factor(): +def bayes_factor(): pass -def get_chi_squared(dists): +def chi_squared(dists): """ Chi-squared test to compare two or more distributions. diff --git a/macroeco/compare/test_compare.py b/macroeco/compare/test_compare.py index 73ae2d8..99cc09b 100644 --- a/macroeco/compare/test_compare.py +++ b/macroeco/compare/test_compare.py @@ -23,7 +23,7 @@ def test_nll(self): # Test against R result: sum(dnorm(c(1,2,3,4,5), log=TRUE)) R_res = 32.09469 test_vals = stats.norm.pdf((1, 2, 3, 4, 5)) - lglk = get_nll(test_vals) + lglk = nll(test_vals) assert_equal(R_res, np.round(lglk, decimals=5)) def test_empirical_cdf(self): @@ -33,24 +33,24 @@ def test_empirical_cdf(self): # Test Case 1 test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] - res = get_empirical_cdf(test_data) + res = empirical_cdf(test_data) assert_array_equal(R_res, res) # Test Case 2 test_data = [3, 3, 3, 3] R_res = [1, 1, 1, 1] - res = get_empirical_cdf(test_data) + res = empirical_cdf(test_data) assert_array_equal(R_res, res) def test_aic(self): test_vals = stats.norm.pdf((1, 2, 3, 4, 5, 6, 7, 8)) - aic1 = get_AIC(test_vals, (1, 1)) + aic1 = AIC(test_vals, (1, 1)) expected = 222.703016531 # Calculated by hand assert_equal(np.round(aic1, decimals=9), expected) test_vals = stats.gamma.pdf((1, 1, 1, 4, 5, 7, 12), 2) - aic1 = get_AIC(test_vals, (1, 1)) + aic1 = AIC(test_vals, (1, 1)) expected = 51.146902 assert_equal(np.round(aic1, decimals=6), expected) @@ -58,7 +58,7 @@ def test_aicc(self): # Test values test_vals = stats.norm.pdf((1, 2, 3, 4, 5, 6, 7, 8)) - aic1 = get_AICC(test_vals, (1, 1)) + aic1 = AICC(test_vals, (1, 1)) # Test that aicc gives the correct values expected = 225.10302 @@ -71,8 +71,8 @@ def test_aic_weights(self): values = [stats.norm.pdf(vals, scale=100), stats.norm.pdf(vals, scale=99)] - aic_vals = [get_AICC(tval, 1) for tval in values] - aicw, delta_aic = get_AIC_weights(aic_vals) + aic_vals = [AICC(tval, 1) for tval in values] + aicw, delta_aic = AIC_weights(aic_vals) pred = np.array([0.47909787, 0.52090213]) assert_array_almost_equal(aicw, pred) @@ -91,7 +91,7 @@ def test_gen_loss_function(self): # Test sum of squares loss function test_loss = np.sum((obs - pred) ** 2) - pred_loss = get_sum_of_squares(obs, pred) + pred_loss = sum_of_squares(obs, pred) assert_equal(test_loss, pred_loss) # Test MSE loss function @@ -106,7 +106,7 @@ def test_r_squared(self): # Already unittested in scipy. Checking for functionaliity test_data = np.random.randint(5, 100, 100) - rsq = get_r_squared(test_data, test_data) + rsq = r_squared(test_data, test_data) assert_equal(rsq, 1) def test_chi_squared(self): @@ -122,19 +122,19 @@ def test_chi_squared(self): bin1 = bin_data(dist1, np.max(bin_max))[0] bin2 = bin_data(dist2, np.max(bin_max))[0] - res = get_chi_squared([bin1, bin2]) + res = chi_squared([bin1, bin2]) # Check three distributions dist3 = stats.logser(p=p).rvs(100) bin3 = bin_data(dist3, np.max(bin_max))[0] - res = get_chi_squared([bin1, bin2, bin3]) + res = chi_squared([bin1, bin2, bin3]) # Check error is thrown with only one dist - assert_raises(AssertionError, get_chi_squared, [bin1]) + assert_raises(AssertionError, chi_squared, [bin1]) # Check error is thrown if bins are different lengths - assert_raises(AssertionError, get_chi_squared, [bin1, bin2[:-1]]) + assert_raises(AssertionError, chi_squared, [bin1, bin2[:-1]]) def test_bin_data(self): @@ -167,7 +167,7 @@ def test_bin_data(self): test_res = bin_data(data, max(data))[0] assert_array_equal(test_res, vegan) - def test_get_lrt(self): + def test_lrt(self): # Test against what the lrtest() R function returns model1 = 158.0494 @@ -175,7 +175,7 @@ def test_get_lrt(self): R_chisquare = 36.4868 R_p = 1.537e-09 - pred_chi, pred_p = get_lrt(model1, model0, 1) + pred_chi, pred_p = lrt(model1, model0, 1) assert_almost_equal(pred_chi, R_chisquare) assert_almost_equal(pred_p, R_p) From c91530d087e5e02daedab58f671b9ee01b800b1a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 18:12:54 -0700 Subject: [PATCH 112/343] Mock shapely for RTD --- doc/conf.py | 2 +- doc/requirements.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 7322dc2..2ed2206 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -115,7 +115,7 @@ def __getattr__(cls, name): else: return Mock() -MOCK_MODULES = ['mpltools'] +MOCK_MODULES = ['mpltools', 'shapely', 'shapely.geometry'] for mod_name in MOCK_MODULES: sys.modules[mod_name] = Mock() diff --git a/doc/requirements.txt b/doc/requirements.txt index d740b97..ee0488c 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,7 +2,6 @@ scipy numpy matplotlib pandas -shapely configparser twiggy numpydoc From a83dd9bfce43a898cf71c9e66192ad589a02b6c8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 18:28:38 -0700 Subject: [PATCH 113/343] Remove get_ prefixes from compare docstring --- macroeco/compare/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index e6eba16..f9fb864 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -13,15 +13,15 @@ .. autosummary:: :toctree: generated/ - get_AIC - get_AICC - get_AIC_weights - get_nll - get_empirical_cdf - get_sum_of_squares - get_r_squared - get_chi_squared - get_lrt + AIC + AICC + AIC_weights + nll + empirical_cdf + sum_of_squares + r_squared + chi_squared + lrt bin_data """ From 22a97ee39f04be46598b9cb3f486b5a8a5c85976 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 19:22:28 -0700 Subject: [PATCH 114/343] Add safe col extraction back to sad --- macroeco/empirical/empirical.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 8927db1..9f70153 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -269,6 +269,8 @@ def sad(patch, cols, splits='', clean=True): """ + spp_col, count_col = ( + [cols.get(x, None) for x in ['spp_col', 'count_col']] ) full_spp_list = np.unique(patch.table[spp_col]) # Loop through each split @@ -278,9 +280,9 @@ def sad(patch, cols, splits='', clean=True): # Get abundance for each species sad_list = [] for spp in full_spp_list: - this_spp = (subtable[cols['spp_col']] == spp) + this_spp = (subtable[spp_col] == spp) if count_col: - count = np.sum(subtable[cols['count_col']][this_spp]) + count = np.sum(subtable[count_col][this_spp]) else: count = np.sum(this_spp) sad_list.append(count) From 0b7823cbb95eb8bd7e887046c79a40737f660efc Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 19:22:48 -0700 Subject: [PATCH 115/343] Note todo for empirical_cdf --- macroeco/compare/compare.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index b29ee5c..3316452 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -93,6 +93,9 @@ def empirical_cdf(data): The empirical cdf corresponding to the inputted data """ + # TODO: This should return sorted data also, otherwise trying to match the + # input data to output does not correspond (result is sorted, data is not + # necessarily). vals = pd.Series(data).value_counts() ecdf = pd.DataFrame(data).set_index(keys=0) From c1b8eee6ada12060a030307c0a657d9cc011fdf1 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 19:23:25 -0700 Subject: [PATCH 116/343] Fix main to work with new package structure and compare --- macroeco/main/main.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index eda3fe2..0230325 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -31,10 +31,10 @@ mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', 'E69F00','F0E442','56B4E9'] -from misc import get_log -import empirical as emp -import models as mod -import compare as comp +from ..misc import get_log +from .. import empirical as emp +from .. import models as mod +from .. import compare as comp def main(param_path='parameters.txt'): @@ -319,7 +319,7 @@ def _get_values(data, model, fits): return values def _get_comparison_statistic(data, fits): - return ['AIC'], [comp.get_AIC(data, fits)] + return ['AIC'], [comp.AIC(data, fits)] def _save_results(options, module, core_results, fit_results): @@ -466,8 +466,8 @@ def calc_func(model, df, shapes): df, calc_func, plot_exec_str) # CDF - # TODO: This goes up by integers to max value, can be too large - x, emp_cdf = comp.get_empirical_cdf(core_result['y'].values) + x = core_result['y'].values + emp_cdf = comp.empirical_cdf(x) df = pd.DataFrame({'x': x, 'empirical': emp_cdf}) def calc_func(model, df, shapes): @@ -543,6 +543,7 @@ def _pad_plot_frame(ax, pad=0.01): return ax - if __name__ == '__main__': + # To execute, run `python -m macroeco.main.main path/to/parameters.txt from + # the root macroeco directory. main(sys.argv[1]) From 4c856fb7511e071bf22c763a7719c9e69a5c54a8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 19:31:56 -0700 Subject: [PATCH 117/343] Add reminder on how to run desktop within package --- macroeco/desktop.py | 1 + 1 file changed, 1 insertion(+) diff --git a/macroeco/desktop.py b/macroeco/desktop.py index eaf0de2..2f5624f 100755 --- a/macroeco/desktop.py +++ b/macroeco/desktop.py @@ -135,6 +135,7 @@ def OnIdle(self, event): self.run_button.Enable(True) # Turn the run button on if __name__ == '__main__': + # To execute, run `pythonw -m macroeco.desktop from root macroeco dir. app = wx.App(False) frame = MainWindow(None, 'Macroeco Desktop') app.MainLoop() From d74fc113b8f645f6426182cffaa75085874d6574 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 21:15:01 -0700 Subject: [PATCH 118/343] Add fallback check to metadata for cols variable --- macroeco/empirical/empirical.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 9f70153..7470726 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -269,8 +269,7 @@ def sad(patch, cols, splits='', clean=True): """ - spp_col, count_col = ( - [cols.get(x, None) for x in ['spp_col', 'count_col']] ) + spp_col, count_col = _get_cols(['spp_col', 'count_col'], cols, patch) full_spp_list = np.unique(patch.table[spp_col]) # Loop through each split @@ -1122,7 +1121,18 @@ def z(doubleS, halfS): - +def _get_cols(special_cols_names, cols, patch): + """ + Retrieve values of special_cols from cols dict or Patch metadata + """ + special_cols_values = [] + for col in special_cols_names: + col_value = cols.get(col, None) + if col_value is None: + col_value = patch.meta['Description'].get(col, None) + special_cols_values.append(col_value) + + return tuple(special_cols_values) @doc_sub(splits_note) From 310432d540fb78d54e8dcba6c39eee378d304460 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 21:15:45 -0700 Subject: [PATCH 119/343] Expand logging with decorator and other changes --- macroeco/empirical/empirical.py | 10 +++++++--- macroeco/main/main.py | 9 +++++++-- macroeco/misc/misc.py | 27 ++++++++++++++++++++++----- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 7470726..1bba807 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -4,6 +4,8 @@ import pandas as pd from configparser import ConfigParser +from twiggy import log +log = log.name('emp ') from math import radians, cos, sin, asin, sqrt import itertools @@ -12,7 +14,7 @@ import shapely.geometry as geo # TODO: Make shapely import work with pyinstaller -from ..misc import doc_sub +from ..misc import doc_sub, log_start_end metric_params = \ """patch : Patch obj @@ -241,7 +243,7 @@ def _subset_table(full_table, subset): return full_table[valid] - +@log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) def sad(patch, cols, splits='', clean=True): """ @@ -300,6 +302,7 @@ def sad(patch, cols, splits='', clean=True): return result_list +@log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) def ssad(patch, cols, splits=''): """ @@ -1131,7 +1134,7 @@ def _get_cols(special_cols_names, cols, patch): if col_value is None: col_value = patch.meta['Description'].get(col, None) special_cols_values.append(col_value) - + return tuple(special_cols_values) @@ -1161,6 +1164,7 @@ def _yield_subtables(patch, splits): if splits: subset_list = _parse_splits(patch, splits) for subset in subset_list: + log.info('Analyzing split: %s' % subset) yield subset, _subset_table(patch.table, subset) else: yield '', patch.table diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 0230325..6802701 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -20,6 +20,8 @@ import shutil import inspect import configparser +from twiggy import log +log = log.name('meco') import numpy as np import pandas as pd @@ -31,7 +33,7 @@ mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', 'E69F00','F0E442','56B4E9'] -from ..misc import get_log +from ..misc import setup_log from .. import empirical as emp from .. import models as mod from .. import compare as comp @@ -54,7 +56,7 @@ def main(param_path='parameters.txt'): param_dir = os.path.dirname(param_path) # Get logger and announce start - log = get_log(param_dir, clear=True) + log = setup_log(param_dir, clear=True) log.info('Starting analysis') # Read parameter file into params object @@ -72,6 +74,7 @@ def main(param_path='parameters.txt'): options['param_dir'] = os.path.abspath(param_dir) options['run_dir'] = os.path.join(param_dir, run_name) _do_analysis(options) + log.info('Finished run %s' % run_name) log.info('Finished analysis successfully') @@ -339,6 +342,8 @@ def _save_results(options, module, core_results, fit_results): """ + log.info("Saving all results") + # Ensure that output dir for this run exists and is empty shutil.rmtree(options['run_dir'], ignore_errors=True) os.makedirs(options['run_dir']) diff --git a/macroeco/misc/misc.py b/macroeco/misc/misc.py index 2f1a5ff..ee598a2 100644 --- a/macroeco/misc/misc.py +++ b/macroeco/misc/misc.py @@ -2,15 +2,18 @@ Set up logging """ -import twiggy -import traceback import sys import os -import time +import traceback import threading as thread +import twiggy +from twiggy import log +log = log.name('meco') +import decorator +import time -def get_log(log_dir, clear=False): +def setup_log(log_dir, clear=False): """ Set up and return logger object """ @@ -26,7 +29,6 @@ def get_log(log_dir, clear=False): ('stdout', twiggy.levels.INFO, None, std_output)) # Get logger - # TODO: Once modules are in subdirs, change to __name__ log = twiggy.log.name('meco') # Log uncaught exceptions (must occur after log declared) @@ -136,3 +138,18 @@ def dec(obj): obj.__doc__ = obj.__doc__.format(*sub) return obj return dec + +def log_start_end(f): + """ + Decorator to log start and end of function + + Use of decorator module here ensures that argspec will inspect wrapped + function, not the decorator itself. + http://micheles.googlecode.com/hg/decorator/documentation.html + """ + def inner(f, *args, **kwargs): + log.info('Starting %s' % f.__name__) + res = f(*args) + log.info('Finished %s' % f.__name__) + return res + return decorator.decorator(inner, f) From dab3275cced02771833f32308ca763f58ed909a3 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sun, 23 Mar 2014 21:16:30 -0700 Subject: [PATCH 120/343] Convert main into module also --- macroeco/main/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 macroeco/main/__init__.py diff --git a/macroeco/main/__init__.py b/macroeco/main/__init__.py new file mode 100644 index 0000000..c28a133 --- /dev/null +++ b/macroeco/main/__init__.py @@ -0,0 +1 @@ +from .main import main From 7b65b13910bcebbf72f7739f441541bc39eb4e8e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 12:03:28 -0700 Subject: [PATCH 121/343] Whitespace cleanup --- macroeco/desktop.py | 4 +-- macroeco/main/main.py | 72 +++++++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/macroeco/desktop.py b/macroeco/desktop.py index 2f5624f..ed19de7 100755 --- a/macroeco/desktop.py +++ b/macroeco/desktop.py @@ -21,7 +21,7 @@ class RedirectText(object): def __init__(self,aWxTextCtrl): self.out=aWxTextCtrl - + def write(self,string): wx.CallAfter(self.out.WriteText, string) @@ -56,7 +56,7 @@ def InitUI(self): choose_button = wx.Button(self, label='Open') self.Bind(wx.EVT_BUTTON, self.OnOpen, choose_button) - + # Make attribute so easily modified by other methods self.choose_msg = wx.StaticText(self, label='') diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 6802701..c331de1 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -3,8 +3,8 @@ Main (:mod:`macroeco.main`) =========================== -This module contains functions that execute macroecological analyses specified -by user-generated `parameters.txt` configuration files. Instructions for +This module contains functions that execute macroecological analyses specified +by user-generated `parameters.txt` configuration files. Instructions for creating parameter files can be found here. .. autosummary:: @@ -30,7 +30,7 @@ from mpltools import style style.use('ggplot') import matplotlib as mpl # Colorblind safe palette -mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', +mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', 'E69F00','F0E442','56B4E9'] from ..misc import setup_log @@ -71,8 +71,8 @@ def main(param_path='parameters.txt'): for run_name in run_names: log.info('Starting run %s' % run_name) options = dict(params[run_name]) - options['param_dir'] = os.path.abspath(param_dir) - options['run_dir'] = os.path.join(param_dir, run_name) + options['param_dir'] = param_dir + options['run_dir'] = os.path.join(results_dir, run_name) _do_analysis(options) log.info('Finished run %s' % run_name) log.info('Finished analysis successfully') @@ -110,7 +110,7 @@ def _function_location(options): elif func_name in mod_funcs: module = 'mod' else: - raise ValueError, ("No analysis of type '%s' is available" % + raise ValueError, ("No analysis of type '%s' is available" % options['analysis']) return module @@ -127,10 +127,10 @@ def _call_analysis_function(options, module): Returns ------- tuple or list of tuples - First element of the tuple gives a string describing the result and the - second element giving the result of the analysis as a dataframe. - Functions in the empirical module return a list of tuples, where each - tuple corresponds to a split. All other functions return a single + First element of the tuple gives a string describing the result and the + second element giving the result of the analysis as a dataframe. + Functions in the empirical module return a list of tuples, where each + tuple corresponds to a split. All other functions return a single tuple. """ @@ -151,7 +151,7 @@ def _get_args_kwargs(options, module): # Create list of values for arg_names args = [] for arg_name in arg_names: - + if arg_name == 'patch': # For patch arg, append actual patch obj args.append(options['patch']) continue @@ -159,7 +159,7 @@ def _get_args_kwargs(options, module): continue if arg_name == 'k': # scipy dists use k and x, we always use x arg_name = 'x' - + try: exec 'args.append(eval("%s"))' % options[arg_name] except SyntaxError: # eval failing because option is a string @@ -177,7 +177,7 @@ def _get_args_kwargs(options, module): except SyntaxError: # eval failing because value is a string kwargs[kw_name] = options[kw_name] except: - raise ValueError, ("Value for optional argument %s is invalid" + raise ValueError, ("Value for optional argument %s is invalid" % kw_name) return args, kwargs @@ -188,10 +188,10 @@ def _emp_extra_options(options): Get special options patch, cols, and splits if analysis in emp module """ - metadata_path = os.path.normpath(os.path.join(options['param_dir'], + metadata_path = os.path.normpath(os.path.join(options['param_dir'], options['metadata'])) if not os.path.isfile(metadata_path): - raise IOError, ("Path to metadata file %s is invalid." % + raise IOError, ("Path to metadata file %s is invalid." % metadata_path) options['patch'] = emp.Patch(metadata_path) @@ -204,7 +204,7 @@ def _get_cols_splits(options): """ Notes ----- - Always returns strings, even if dictionary or list is constructed here, to + Always returns strings, even if dictionary or list is constructed here, to ensure consistency with provided options. """ @@ -218,7 +218,7 @@ def _get_cols_splits(options): else: for col in special_cols: cols[col] = options.get(col, None) - + # If col is still None, try to fall back to metadata for col in special_cols: if cols[col] is None: @@ -275,15 +275,15 @@ def _fit_models(options, core_results): Returns ------- list of dicts - Each element in list corresponds to a split. The dict has a key for - each model given in options, and the value is a list of fitted - parameters (tuple), values (array), comparison statistic names (list), + Each element in list corresponds to a split. The dict has a key for + each model given in options, and the value is a list of fitted + parameters (tuple), values (array), comparison statistic names (list), and comparison statistic values (list). Notes ----- - To determine if the empirical result refers to a curve or a distribution, - the result dataframe is inspected for a column 'x', which indicates a + To determine if the empirical result refers to a curve or a distribution, + the result dataframe is inspected for a column 'x', which indicates a curve. """ @@ -318,7 +318,7 @@ def _get_values(data, model, fits): values = eval("mod.%s.pmf(data, *fits)" % model) except: pass - + return values def _get_comparison_statistic(data, fits): @@ -379,12 +379,12 @@ def _write_core_tables(options, module, core_results): """ Notes ----- - Depending on function that was called for analysis, core_results may be a + Depending on function that was called for analysis, core_results may be a list of tuples (empirical), a dataframe, an array, or a single value. - For the list of tuples from empirical, the second element of each tuple is - the raw result, and we write them all with the appropriate prefix. For - dataframes, we write them. For arrays or single values, we convert to data + For the list of tuples from empirical, the second element of each tuple is + the raw result, and we write them all with the appropriate prefix. For + dataframes, we write them. For arrays or single values, we convert to data frames and write them. """ @@ -440,12 +440,12 @@ def _write_test_statistics(spid, models, options, fit_results): f.close() -def _write_comparison_plots_tables(spid, models, options, core_results, +def _write_comparison_plots_tables(spid, models, options, core_results, fit_results): """ Notes ----- - Only applies to analysis using functions from empirical in which models are + Only applies to analysis using functions from empirical in which models are also given. - pdf/pmf vs histogram @@ -467,7 +467,7 @@ def calc_func(model, df, shapes): plot_exec_str="ax.scatter(df['x'], emp, color='k');ax.set_yscale('log')" - _save_table_and_plot(spid, models, options, fit_results, 'data_pred_rad', + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_rad', df, calc_func, plot_exec_str) # CDF @@ -480,12 +480,12 @@ def calc_func(model, df, shapes): plot_exec_str = "ax.step(df['x'], emp, color='k', lw=3);ax.set_ylim(top=1)" - _save_table_and_plot(spid, models, options, fit_results, 'data_pred_cdf', + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_cdf', df, calc_func, plot_exec_str) # PDF/PMF hist_bins = 11 - emp_hist, edges = np.histogram(core_result['y'].values, hist_bins, + emp_hist, edges = np.histogram(core_result['y'].values, hist_bins, normed=True) x = (np.array(edges[:-1]) + np.array(edges[1:])) / 2 df = pd.DataFrame({'x': x, 'empirical': emp_hist}) @@ -494,15 +494,15 @@ def calc_func(model, df, shapes): try: return eval("mod.%s.pmf(np.floor(df['x']), *shapes)" % model) except: - return eval("%s.pdf(df['x'], *shapes)" % model) + return eval("mod.%s.pdf(df['x'], *shapes)" % model) plot_exec_str = "ax.bar(df['x']-width/2, emp, width=width, color='gray')" - _save_table_and_plot(spid, models, options, fit_results, 'data_pred_pdf', + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_pdf', df, calc_func, plot_exec_str) -def _save_table_and_plot(spid, models, options, fit_results, name, df, +def _save_table_and_plot(spid, models, options, fit_results, name, df, calc_func, plot_exec_str): f_path = _get_file_path(spid, options, '%s.csv' % name) @@ -549,6 +549,6 @@ def _pad_plot_frame(ax, pad=0.01): return ax if __name__ == '__main__': - # To execute, run `python -m macroeco.main.main path/to/parameters.txt from + # To execute, run `python -m macroeco.main.main path/to/parameters.txt from # the root macroeco directory. main(sys.argv[1]) From 336a6dde9658264986f7fa0b09b3913c36881794 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 12:05:04 -0700 Subject: [PATCH 122/343] Move all results to result subdir --- macroeco/main/main.py | 14 ++++++++++---- macroeco/misc/misc.py | 4 +--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index c331de1..8215b1f 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -50,13 +50,19 @@ def main(param_path='parameters.txt'): """ - # Confirm file is present and extract dir + # Confirm parameters file is present and extract dir if not os.path.isfile(param_path): raise IOError, "Parameter file not found at %s" % param_path - param_dir = os.path.dirname(param_path) - + param_dir = os.path.abspath(os.path.dirname(param_path)) + + # Setup results_dir + results_dir = os.path.join(param_dir, 'results') + if os.path.isdir(results_dir): + shutil.rmtree(results_dir) + os.makedirs(results_dir) + # Get logger and announce start - log = setup_log(param_dir, clear=True) + log = setup_log(results_dir, clear=True) log.info('Starting analysis') # Read parameter file into params object diff --git a/macroeco/misc/misc.py b/macroeco/misc/misc.py index ee598a2..f4dc164 100644 --- a/macroeco/misc/misc.py +++ b/macroeco/misc/misc.py @@ -19,9 +19,7 @@ def setup_log(log_dir, clear=False): """ # Get path to log file and clear if requested - log_path = os.path.join(log_dir,'log.txt') - if clear and os.path.isfile(log_path): - os.remove(log_path) + log_path = os.path.join(log_dir,'_log.txt') # Get outputs and add emitters file_output, std_output = _logger_outputs(log_path) From 1b570627679477c3717aa7cae6ca71ba998069aa Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 12:29:01 -0700 Subject: [PATCH 123/343] Logger not used in desktop.py --- macroeco/desktop.py | 1 - 1 file changed, 1 deletion(-) diff --git a/macroeco/desktop.py b/macroeco/desktop.py index ed19de7..ee2b3a6 100755 --- a/macroeco/desktop.py +++ b/macroeco/desktop.py @@ -16,7 +16,6 @@ import threading as thread import main -from misc import get_log class RedirectText(object): def __init__(self,aWxTextCtrl): From fd97a616d88be17139feac011a530b46259ae924 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 13:34:27 -0700 Subject: [PATCH 124/343] Wrap shapely in try, since hard to install --- macroeco/empirical/empirical.py | 107 ++++++++++++++++---------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 1bba807..e1d5395 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -11,7 +11,10 @@ import itertools from copy import deepcopy import scipy.spatial.distance as dist -import shapely.geometry as geo +try: + import shapely.geometry as geo +except: + pass # TODO: Make shapely import work with pyinstaller from ..misc import doc_sub, log_start_end @@ -20,21 +23,21 @@ """patch : Patch obj Patch object containing data for analysis cols : dict - Indicates which column names in patch data table are associated with + Indicates which column names in patch data table are associated with species identifiers, counts, energy, and mass. See Notes. splits : str - If multiple analyses for subsets of patch data table are desired, + If multiple analyses for subsets of patch data table are desired, specifies how columns should be split. See Notes.""" metric_return = \ """list - List of tuples containing results, where the first element of each - tuple is a string indicating the split values used for that result and + List of tuples containing results, where the first element of each + tuple is a string indicating the split values used for that result and second element is a dataframe giving the result.""" cols_note = \ """The parameter `cols` is a dictionary with keys for four special - columns and values giving the column name in the patch data table + columns and values giving the column name in the patch data table associated with each special column. - spp_col - Unique species identifiers @@ -42,11 +45,11 @@ - energy_col - Energy of individuals - mass_cal - Mass of individuals - Only spp_col is always mandatory. Note that the value of spp_col may be - set to a columm in the data table giving the genus, family, functional - group, etc., which allows for analysis of this metric by those groups. + Only spp_col is always mandatory. Note that the value of spp_col may be + set to a columm in the data table giving the genus, family, functional + group, etc., which allows for analysis of this metric by those groups. count_col is used when multiple individuals of a species may be found at - a single recorded location, as is the case in gridded censuses where all + a single recorded location, as is the case in gridded censuses where all individuals in a quadrat are "assigned" to a single point. energy_col and mass_col are used for energy-based metrics. """ @@ -54,14 +57,14 @@ splits_note = \ """The parameter `splits` is a semicolon-separated string in the form of "column: value", where column is a name of a column in the patch data - table and value is either (a) an integer giving the number of + table and value is either (a) an integer giving the number of equally-spaced divisions of a column, or (b) the special keyword 'split', which evaluates all unique levels of a column. - For example, presume a data table has columns for x and y spatial - coordinates and a column for year, of which there are three. The string - "x:2; y:2; year:split" will perform the analysis separately for each of - four subplots of the patch (created by dividing the x and y coordinates + For example, presume a data table has columns for x and y spatial + coordinates and a column for year, of which there are three. The string + "x:2; y:2; year:split" will perform the analysis separately for each of + four subplots of the patch (created by dividing the x and y coordinates each into two equally sized divisions) within each of the three years, for a total of 12 separate analyses.""" @@ -82,28 +85,28 @@ class Patch(object): table : dataframe Table of census data recorded in patch meta : ConfigParser obj - Object similar to dict describing data table, loaded from metadata file + Object similar to dict describing data table, loaded from metadata file at metadata_path subset : str Subset string passed as parameter Notes ----- - The table file described by the metadata must contain column names - consisting only of letters and numbers, with no spaces or other special + The table file described by the metadata must contain column names + consisting only of letters and numbers, with no spaces or other special characters. - The parameter subset takes different forms depending on whether the data + The parameter subset takes different forms depending on whether the data file described by the metadata is a csv or a sql/db file. - - For csv data files, subset is a semicolon-separated string describing - subset operations. For example, the string "year==2005; x>20; x<40; - spp=='cabr'" loads a data table containing only records for which the year - is 2005, x values are between 20 and 40, and species 'cabr'. Note that for - categorical columns, the value of the column must be enclosed in single + + For csv data files, subset is a semicolon-separated string describing + subset operations. For example, the string "year==2005; x>20; x<40; + spp=='cabr'" loads a data table containing only records for which the year + is 2005, x values are between 20 and 40, and species 'cabr'. Note that for + categorical columns, the value of the column must be enclosed in single quotes. - For sql/db files, subset is a SQL query string that selects the data from + For sql/db files, subset is a SQL query string that selects the data from the data file. """ @@ -113,8 +116,8 @@ def __init__(self, metadata_path, subset=''): self.meta = ConfigParser() self.meta.read(metadata_path) self.subset = subset - self.table = self._load_table(metadata_path, - self.meta['Description']['datapath'], + self.table = self._load_table(metadata_path, + self.meta['Description']['datapath'], subset) @@ -139,7 +142,7 @@ def _load_table(self, metadata_path, relative_data_path, subset): """ metadata_dir = os.path.dirname(metadata_path) - data_path = os.path.normpath(os.path.join(metadata_dir, + data_path = os.path.normpath(os.path.join(metadata_dir, relative_data_path)) type = data_path.split('.')[-1] @@ -156,19 +159,19 @@ def _load_table(self, metadata_path, relative_data_path, subset): def _get_db_table(self, data_path, type): """ Query a database and return query result as a recarray - + Parameters ---------- data_path : str Path to the database file type : str Type of database, either sql or db - + Returns ------- table : recarray The database query as a recarray - + """ # TODO: This is probably broken @@ -182,12 +185,12 @@ def _get_db_table(self, data_path, type): sql = f.read() cur.executescript(sql) - + else: con = lite.connect(data_path) con.row_factory = lite.Row cur = con.cursor() - + cur.execute(self.subset) # Check that table is not empty @@ -195,12 +198,12 @@ def _get_db_table(self, data_path, type): try: col_names = db_info[0].keys() except IndexError: - raise lite.OperationalError("Query %s to database %s is empty" % + raise lite.OperationalError("Query %s to database %s is empty" % (query_str, data_path)) # Convert objects to tuples converted_info = [tuple(x) for x in db_info] - + # NOTE: Using default value for Unicode: Seems better than checking # lengths. Should we keep the type as unicode? dtypes=[type(x) if type(x) != unicode else 'S150' for x in db_info[0]] @@ -208,7 +211,7 @@ def _get_db_table(self, data_path, type): table = np.array(converted_info, dtype=zip(col_names, dtypes)) con.commit() con.close() - + # Return a recarray for consistency # TODO: This should now be a pd.dataframe return table.view(np.recarray) @@ -233,7 +236,7 @@ def _subset_table(full_table, subset): """ if not subset: return full_table - + conditions = subset.split(';') valid = np.ones(len(full_table), dtype=bool) @@ -253,8 +256,8 @@ def sad(patch, cols, splits='', clean=True): ---------- {0} clean : bool - If True, all species with zero abundance are removed from SAD results - (relevant if splits is used and some splits are missing species). + If True, all species with zero abundance are removed from SAD results + (relevant if splits is used and some splits are missing species). Default False. Returns @@ -1008,23 +1011,23 @@ def ased(self, criteria, normalize=True, exponent=0.75): def tsed(self, criteria, normalize=True, exponent=0.75): ''' Calculates the total species energy distribution for each given - species in a subset. - + species in a subset. + Parameters ---------- criteria : dict Dictionary must have contain a key with the value 'energy' or 'mass'. See sad method for further requirements. - + Returns ------- - result : list + result : list List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the average energy for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. + dictionary of criteria for this calculation and second element is a + 1D ndarray of length species containing the average energy for each + species. The third element is 1D array listing identifiers for + species in the same order as they appear in the second element of + result. ''' @@ -1040,7 +1043,7 @@ def tsed(self, criteria, normalize=True, exponent=0.75): len(this_sed[1][spp]) != 0] # Truncated spp_list if necessary spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] - + result.append((this_sed[0], np.array(omega), np.array(spp_list))) return result @@ -1134,7 +1137,7 @@ def _get_cols(special_cols_names, cols, patch): if col_value is None: col_value = patch.meta['Description'].get(col, None) special_cols_values.append(col_value) - + return tuple(special_cols_values) @@ -1194,7 +1197,7 @@ def _parse_splits(patch, splits): """ split_list = splits.split(';') # Split commands for each col separate - subset_list = [] # List of all subset strings + subset_list = [] # List of all subset strings for split in split_list: col, val = split.split(':') From 1670d5c7b922777a149d72706978de92c1e30d55 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 13:53:55 -0700 Subject: [PATCH 125/343] Add decorator to requirements for RTD --- doc/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/requirements.txt b/doc/requirements.txt index ee0488c..4a38cd3 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -3,6 +3,7 @@ numpy matplotlib pandas configparser +decorator twiggy numpydoc macroeco From 30a9e9e6774d33f7cd4c9f492f861e481b4271a5 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 15:27:05 -0700 Subject: [PATCH 126/343] Even spacing in split logging --- macroeco/empirical/empirical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index e1d5395..5cba62d 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -1211,13 +1211,13 @@ def _parse_splits(patch, splits): step = (col_max - col_min) / eval(val) starts = np.arange(col_min, col_max, step) ends = starts + step - level_list = [col + '>=' + str(x) + '; ' + col + '<' + str(y)+';' + level_list = [col + '>=' + str(x) + '; ' + col + '<' + str(y)+'; ' for x, y in zip(starts, ends)] subset_list.append(level_list) # Get product of all string levels as list, conv to string, drop final ; - return [''.join(x)[:-1] for x in _product(*subset_list)] + return [''.join(x)[:-2] for x in _product(*subset_list)] def _product(*args, **kwds): From f3dc3eae49162578245a9903d47fe31eae113ba8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 15:27:24 -0700 Subject: [PATCH 127/343] Correctly handle spaces in split and subset strings --- macroeco/empirical/empirical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 5cba62d..ef4b073 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -237,7 +237,7 @@ def _subset_table(full_table, subset): if not subset: return full_table - conditions = subset.split(';') + conditions = subset.replace(' ','').split(';') valid = np.ones(len(full_table), dtype=bool) for condition in conditions: @@ -1196,7 +1196,7 @@ def _parse_splits(patch, splits): """ - split_list = splits.split(';') # Split commands for each col separate + split_list = splits.replace(' ','').split(';') subset_list = [] # List of all subset strings for split in split_list: From 5f22ca51b23bd47d38aad9b25c61adf1fd4380c0 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 15:34:25 -0700 Subject: [PATCH 128/343] Make desktop.py sole entry point to main --- macroeco/desktop.py | 13 +++++++++---- macroeco/main/main.py | 5 ----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/macroeco/desktop.py b/macroeco/desktop.py index ee2b3a6..2cf7344 100755 --- a/macroeco/desktop.py +++ b/macroeco/desktop.py @@ -134,7 +134,12 @@ def OnIdle(self, event): self.run_button.Enable(True) # Turn the run button on if __name__ == '__main__': - # To execute, run `pythonw -m macroeco.desktop from root macroeco dir. - app = wx.App(False) - frame = MainWindow(None, 'Macroeco Desktop') - app.MainLoop() + # To execute, run `pythonw -m macroeco.desktop path/to/parameters.txt`. + # With arg, execute main.main(arg), without arg open GUI window + if len(sys.argv) > 1: + param_path = sys.argv[1] + main.main(param_path) + else: + app = wx.App(False) + frame = MainWindow(None, 'Macroeco Desktop') + app.MainLoop() diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 8215b1f..8c8b13d 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -553,8 +553,3 @@ def _pad_plot_frame(ax, pad=0.01): ax.set_ylim(ymin - yrange*pad, ymax + yrange*pad) return ax - -if __name__ == '__main__': - # To execute, run `python -m macroeco.main.main path/to/parameters.txt from - # the root macroeco directory. - main(sys.argv[1]) From 1c0b02a70b838cfc463e28ac8847fd8cc145772d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 17:26:00 -0700 Subject: [PATCH 129/343] Add main module to doc index --- doc/index.rst | 3 ++- doc/main.rst | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 doc/main.rst diff --git a/doc/index.rst b/doc/index.rst index 35ad3ca..67abb25 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,6 +1,6 @@ .. macroeco documentation master file, created by sphinx-quickstart on Sun Feb 16 21:19:54 2014. - You can adapt this file completely to your liking, but it should at least + You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Macroeco: Ecological pattern analysis in Python @@ -14,6 +14,7 @@ Welcome to macroeco. empirical models compare + main Indices and tables diff --git a/doc/main.rst b/doc/main.rst new file mode 100644 index 0000000..8a345b8 --- /dev/null +++ b/doc/main.rst @@ -0,0 +1 @@ +.. automodule:: macroeco.main From 4122497f6124fc20f6b02e5fa34eed2236e57dbc Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 18:46:52 -0700 Subject: [PATCH 130/343] Move main doctoring to __init__ --- macroeco/main/__init__.py | 16 ++++++++++++++++ macroeco/main/main.py | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/macroeco/main/__init__.py b/macroeco/main/__init__.py index c28a133..4019aaa 100644 --- a/macroeco/main/__init__.py +++ b/macroeco/main/__init__.py @@ -1 +1,17 @@ +""" +=========================== +Main (:mod:`macroeco.main`) +=========================== + +This module contains functions that execute macroecological analyses specified +by user-generated `parameters.txt` configuration files. Instructions for +creating parameter files can be found here. + +.. autosummary:: + :toctree: generated/ + + main + +""" + from .main import main diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 8c8b13d..373995d 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -1,19 +1,3 @@ -""" -=========================== -Main (:mod:`macroeco.main`) -=========================== - -This module contains functions that execute macroecological analyses specified -by user-generated `parameters.txt` configuration files. Instructions for -creating parameter files can be found here. - -.. autosummary:: - :toctree: generated/ - - main - -""" - from __future__ import division import sys import os From 58a327f4ead4edfea15d47b3600c9d046ad553d0 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 18:47:25 -0700 Subject: [PATCH 131/343] Add filename option to setup_log --- macroeco/misc/misc.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/macroeco/misc/misc.py b/macroeco/misc/misc.py index f4dc164..efb0d8d 100644 --- a/macroeco/misc/misc.py +++ b/macroeco/misc/misc.py @@ -13,17 +13,19 @@ import decorator import time -def setup_log(log_dir, clear=False): +def setup_log(log_dir, file_name='_log.txt', clear=False): """ Set up and return logger object """ # Get path to log file and clear if requested - log_path = os.path.join(log_dir,'_log.txt') - + log_path = os.path.join(log_dir, file_name) + if clear and os.path.isfile(log_path): + os.remove(log_path) + # Get outputs and add emitters file_output, std_output = _logger_outputs(log_path) - twiggy.addEmitters(('file', twiggy.levels.DEBUG, None, file_output), + twiggy.addEmitters(('file', twiggy.levels.DEBUG, None, file_output), ('stdout', twiggy.levels.INFO, None, std_output)) # Get logger @@ -51,7 +53,7 @@ def __call__(self, msg): print "{text}".format(**locals()) return "" std_format = stdLineFormat(traceback_prefix='') - + # file_format - customized to show local time, etc conversion = twiggy.lib.converter.ConversionTable() conversion.add("time", _logger_better_time, "[{1}]".format) @@ -123,12 +125,12 @@ def _doc(func): def doc_sub(*sub): """ Decorator for performing substitutions in docstrings. - - Using @doc_sub(some_note, other_note) on a function with {0} and {1} in the - docstring will substitute the contents of some_note and other_note for {0} + + Using @doc_sub(some_note, other_note) on a function with {0} and {1} in the + docstring will substitute the contents of some_note and other_note for {0} and {1}, respectively. - Decorator appears to work properly both with IPython help (tab completion + Decorator appears to work properly both with IPython help (tab completion and ?) and with Sphinx. """ @@ -141,7 +143,7 @@ def log_start_end(f): """ Decorator to log start and end of function - Use of decorator module here ensures that argspec will inspect wrapped + Use of decorator module here ensures that argspec will inspect wrapped function, not the decorator itself. http://micheles.googlecode.com/hg/decorator/documentation.html """ From 92c088b0c57cb5e24f12d9bb3c3f9487e3fd4fa9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 18:49:04 -0700 Subject: [PATCH 132/343] Minor cleanups in desktop --- macroeco/desktop.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/macroeco/desktop.py b/macroeco/desktop.py index 2cf7344..4642c98 100755 --- a/macroeco/desktop.py +++ b/macroeco/desktop.py @@ -30,8 +30,8 @@ class MainWindow(wx.Frame): def __init__(self, parent, title): wx.Frame.__init__(self, parent, title=title) self.t = None - self.dirname = '.' - self.parampath = 'parameters.txt' + self.dirname = '' + self.parampath = '' self.InitUI() self.Show(True) @@ -77,19 +77,18 @@ def InitUI(self): sizer2.Add(self.run_button, 0, wx.EXPAND) # Updating process - self.process = None self.Bind(wx.EVT_BUTTON, self.OnRun, self.run_button) # Output window sizerlogbox = wx.BoxSizer(wx.HORIZONTAL) - self.logbox = wx.TextCtrl(self, wx.ID_ANY, size=(400,400), + self.logbox = wx.TextCtrl(self, wx.ID_ANY, size=(500,400), style = wx.TE_MULTILINE|wx.TE_READONLY|wx.HSCROLL) sizerlogbox.Add(self.logbox, 1, wx.EXPAND) - # redirect text here - redir=RedirectText(self.logbox) - sys.stdout=redir - sys.stderr=redir + # Redirect text here + redir = RedirectText(self.logbox) + sys.stdout = redir + sys.stderr = redir # Restore run button self.Bind(wx.EVT_IDLE, self.OnIdle) From c28870b9af211105289dfc91d808475f4d38e760 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 24 Mar 2014 18:49:30 -0700 Subject: [PATCH 133/343] Cleanups in main --- macroeco/main/main.py | 88 +++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 373995d..1894530 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -29,8 +29,8 @@ def main(param_path='parameters.txt'): Parameters ---------- - param_dir : str - Path to directory containing user-generated parameter file + param_path : str + Path to user-generated parameter file """ @@ -39,14 +39,14 @@ def main(param_path='parameters.txt'): raise IOError, "Parameter file not found at %s" % param_path param_dir = os.path.abspath(os.path.dirname(param_path)) - # Setup results_dir + # Setup results_dir, remove if present results_dir = os.path.join(param_dir, 'results') if os.path.isdir(results_dir): shutil.rmtree(results_dir) os.makedirs(results_dir) # Get logger and announce start - log = setup_log(results_dir, clear=True) + log = setup_log(results_dir) log.info('Starting analysis') # Read parameter file into params object @@ -91,13 +91,15 @@ def _do_analysis(options): def _function_location(options): - # TODO: Add check for spec module + # TODO: Add spec and misc modules + # This relies on the assumption that there are no duplicate member names + # in the different modules. func_name = options['analysis'].split('.')[0] # Ignore method if present - emp_funcs = [x[0] for x in inspect.getmembers(emp)] - mod_funcs = [x[0] for x in inspect.getmembers(mod)] - if func_name in emp_funcs: + emp_members = [x[0] for x in inspect.getmembers(emp)] + mod_members = [x[0] for x in inspect.getmembers(mod)] + if func_name in emp_members: module = 'emp' - elif func_name in mod_funcs: + elif func_name in mod_members: module = 'mod' else: raise ValueError, ("No analysis of type '%s' is available" % @@ -107,21 +109,22 @@ def _function_location(options): def _call_analysis_function(options, module): """ - Call function and get return, using inputs from options + Call function from module and get result, using inputs from options Parameters ---------- options : dict Option names and values for analysis + module : str + Short name of module within macroeco containing analysis function Returns ------- - tuple or list of tuples - First element of the tuple gives a string describing the result and the + dataframe, array, value, list of tuples + Functions from emp module return a list of tuples in which first + element of the tuple gives a string describing the result and the second element giving the result of the analysis as a dataframe. - Functions in the empirical module return a list of tuples, where each - tuple corresponds to a split. All other functions return a single - tuple. + Functions in other modules return dataframe, array, or value. """ @@ -131,7 +134,7 @@ def _call_analysis_function(options, module): def _get_args_kwargs(options, module): """ - Given an analysis, options, and a module, extract args and kwargs + Given an options (including analysis), and module, extract args and kwargs """ if module == 'emp': @@ -240,7 +243,7 @@ def _arg_kwarg_lists(options, module): kw_names = [] # Inspection for rv classes doesn't work since it uses args internally - # Unless method is translate_args or fit2, appens shapes to args + # Unless method is translate_args or fit2, appends shapes to args try: obj_meth = options['analysis'].split('.') if obj_meth[1] not in ['fit2', 'translate_args']: @@ -282,18 +285,19 @@ def _fit_models(options, core_results): # TODO: Make work for 2D results, i.e., curves, comm_sep, o_ring # TODO: Make work for curves in general (check if 'x' present in core_res) - extra_results = [] + fit_results = [] for core_result in core_results: # Each split - extra_result = {} + fit_result = {} for model in models: data = core_result[1]['y'].values fits = _get_fits(data, model) + # TODO: values is probably better moved to output part values = _get_values(data, model, fits) stat_names, stats = _get_comparison_statistic(values, fits) - extra_result[model] = [fits, values, stat_names, stats] - extra_results.append(extra_result) + fit_result[model] = [fits, values, stat_names, stats] + fit_results.append(fit_result) - return extra_results + return fit_results def _get_fits(data, model): @@ -325,17 +329,16 @@ def _save_results(options, module, core_results, fit_results): Option names and values for analysis module : str Module that contained function used to generate core_results - core_results : list, dataframe, or array + core_results : dataframe, array, value, list of tuples Results of main analysis - fit_results : list + fit_results : list or None Results of comparing emp analysis to models, None if not applicable """ log.info("Saving all results") - # Ensure that output dir for this run exists and is empty - shutil.rmtree(options['run_dir'], ignore_errors=True) + # Make run directory os.makedirs(options['run_dir']) # Write core results @@ -354,17 +357,6 @@ def _save_results(options, module, core_results, fit_results): core_results, fit_results) -def _write_split_index_file(options, core_results): - """ - Write table giving index of splits, giving number and combination - """ - - f_path = os.path.join(options['run_dir'], '_split_index.csv') - with open(f_path, 'a') as f: - for i, core_result in enumerate(core_results): - f.write("%i,%s\n" % (i+1, str(core_result[0]))) - - def _write_core_tables(options, module, core_results): """ Notes @@ -400,8 +392,21 @@ def _get_file_path(spid, options, file_name): '%i_%s' % (spid+1, file_name)) -def _write_fitted_params(spid, models, options, fit_results): +def _write_split_index_file(options, core_results): + """ + Write table giving index of splits, giving number and combination + """ + f_path = os.path.join(options['run_dir'], '_split_index.csv') + split_strs = zip(*core_results)[0] + index = np.arange(len(split_strs)) + 1 + df = pd.DataFrame({'splits': split_strs}, index=index) + df.to_csv(f_path) + + +def _write_fitted_params(spid, models, options, fit_results): + # TODO: Consider converting to pandas, need to deal with variable length + # TODO: Possibility - empty data frame max length, max width = nparams f = open(_get_file_path(spid, options, 'fitted_params.csv'), 'w') f.write("Model, Fit Parameters\n") @@ -414,14 +419,14 @@ def _write_fitted_params(spid, models, options, fit_results): def _write_test_statistics(spid, models, options, fit_results): # TODO: Add delta test statistics columns - + # TODO: Make dataframe? f = open(_get_file_path(spid, options, 'test_statistics.csv'), 'w') # Gets stat name list from any element of result dict - same for all stat_names_list = next(fit_results[spid].itervalues())[2] stat_names_str = str(stat_names_list)[1:-1].strip("'") - f.write("Theory, %s\n" % stat_names_str) + f.write("Model, %s\n" % stat_names_str) for model in models: fit_result = fit_results[spid][model] @@ -442,6 +447,7 @@ def _write_comparison_plots_tables(spid, models, options, core_results, - cdf vs emp cdf - rad vs rad """ + # TODO: More general function for RAD that deals with -0.5/len issue core_result = core_results[spid][1] n_vals = len(core_result) @@ -506,6 +512,8 @@ def _save_table_and_plot(spid, models, options, fit_results, name, df, df.to_csv(f_path, index=False, float_format='%.4f') # Table + # TODO: We only want x and models here, not any other cols that might be + # returned in the empirical calculation. df_plt = df.set_index('x') # Figure emp = df_plt['empirical'] df_plt = df_plt.drop('empirical',1) From b3ebe42f03519407b5cf79fd0249623d7b1c1fac Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 09:59:56 -0700 Subject: [PATCH 134/343] Fix log_start_end to also use kwargs --- macroeco/misc/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/misc/misc.py b/macroeco/misc/misc.py index efb0d8d..e4a5bab 100644 --- a/macroeco/misc/misc.py +++ b/macroeco/misc/misc.py @@ -149,7 +149,7 @@ def log_start_end(f): """ def inner(f, *args, **kwargs): log.info('Starting %s' % f.__name__) - res = f(*args) + res = f(*args, **kwargs) log.info('Finished %s' % f.__name__) return res return decorator.decorator(inner, f) From eba665c75fc3ad14ac2abf82cc36bb4e06385b4e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 12:01:38 -0700 Subject: [PATCH 135/343] Code review of models, incl change name of fit2 --- macroeco/models/models.py | 414 ++++++++++++++++----------------- macroeco/models/test_models.py | 32 +-- macroeco/test_distributions.py | 116 ++++----- 3 files changed, 269 insertions(+), 293 deletions(-) diff --git a/macroeco/models/models.py b/macroeco/models/models.py index dd406c4..5f03d1e 100644 --- a/macroeco/models/models.py +++ b/macroeco/models/models.py @@ -4,35 +4,13 @@ import numpy as np import numpy.random as nprand -from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, +from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, docdict_discrete) import scipy.stats.distributions as spdist import scipy.optimize as optim import scipy.special as special -from ..misc import inherit_docstring_from - -_doc_default_callparams = \ -""" -Parameters ----------- -x : array_like - quantiles -q : array_like - lower or upper tail probability -%(shapes)s : array_like - shape parameters -loc : array_like, optional - location parameter (default=0) -scale : array_like, optional - scale parameter (default=1) -size : int or tuple of ints, optional - shape of random variates (default computed from input arguments ) -moments : str, optional - composed of letters ['mvsk'] specifying which moments to compute where - 'm' = mean, 'v' = variance, 's' = (Fisher's) skew and - 'k' = (Fisher's) kurtosis. (default='mv') -""" +from ..misc import doc_sub, inherit_docstring_from # Remove header from all methods @@ -42,146 +20,170 @@ # **kwds in expect string followed by no space was throwing warning _docdict_allmeth = _docdict_allmeth.replace(', **kwds','') +# Additional docstrings for custom methods +_docdict_rank_method = \ +"""rank(n, %(shapes)s) + Predicted rank abundance distribution. +""" + +_docdict_extra_params = \ +"""n : int + number of values +data : array_like + values used to fit distribution +""" + # Create docstring helpers -docdict['before_notes'] = ''.join([_docdict_allmeth,docdict['callparams']]) +docdict['before_notes'] = ''.join([_docdict_rank_method, + _docdict_allmeth, + docdict['callparams'], + _docdict_extra_params]) + +docdict_discrete['before_notes'] = ''.join([_docdict_rank_method, + _docdict_discrete_allmeth, + docdict['callparams'], + _docdict_extra_params]) + +_doc_translate_args = \ +""" +Translates user-friendly arguments into shape parameters + +See distribution docstring for description of user arguments and shape +parameters. + +Parameters +---------- +uargs : floats + User argument(s), usually easily measured and specified + +Returns +------- +tuple of floats + Shape parameter(s) of distribution + +Notes +----- +""" + +_doc_fit_mle = \ +""" +Return MLEs for shape parameters from data + +Parameters +---------- +data : array_like + Data to use in calculating the MLEs. +args : floats + Starting value(s) for shape parameters. Some may be held constant + (see Notes). + +Returns +------- +tuple of floats + MLEs for shape parameters + +Notes +----- +""" + +_doc_rank = \ +""" +Return predicted rank abundance distribution + +Parameters +---------- +n : int + Number of values to return +%(shapes)s : array_like + shape parameters + +Returns +------- +array + Values of rank abundance distribution + +Notes +----- +Describe 0.5 offset. References. + +""" +# TODO: Finish doc_rank above -docdict_discrete['before_notes'] = ''.join([_docdict_discrete_allmeth, - docdict['callparams']]) class rv_continuous_meco(rv_continuous): """ A modified generic continuous random variable class meant for subclassing. - This class inherits from the `rv_continuous` class of `scipy.stats` and - contains all of its functionality. See the docstring of `rv_continuous` for - information on usage and subclassing. In addition, this class adds one new + This class inherits from the `rv_continuous` class of `scipy.stats` and + contains all of its functionality. See the docstring of `rv_continuous` for + information on usage and subclassing. In addition, this class adds two new methods. Methods ------- translate_args - takes user-friendly params as input and returns shape params - - fit2 - calls method `fit` with fixed loc=0 and scale=1 (defaults) + Shape parameters given user-friendly parameters (see notes) + fit_mle + Shape parameters given data and optional keyword arguments (see notes) + rank + Rank abundance distribution """ + @doc_sub(_doc_translate_args) def translate_args(self, *args): - """ - Translates user-friendly arguments into shape parameters - - See distribution docstring for description of user arguments and shape - parameters. - - Parameters - ---------- - uargs : floats - User argument(s), usually easily measured and specified - - Returns - ------- - tuple of floats - Shape parameter(s) of distribution - - Notes - ----- - """ - + """{0}""" raise NotImplementedError, ("translate_args method not implemented " "for this distribution") - - def fit2(self, *args): - """ - Return MLEs for shape parameters from data. - - Parameters - ---------- - data : array_like - Data to use in calculating the MLEs. - args : floats - Starting value(s) for shape parameters. Some may be held constant - (see Notes). - - Returns - ------- - tuple of floats - MLEs for shape parameters - - Notes - ----- - """ - + @doc_sub(_doc_fit_mle) + def fit_mle(self, *args): + """{0}""" return self.fit(*args, floc=0, fscale=1)[:-2] + @doc_sub(_doc_rank) + def rank(self, n, *args): + """{0}""" + return self.ppf((np.arange(1, n+1) - 0.5) / n, *args) + class rv_discrete_meco(rv_discrete): """ A modified generic discrete random variable class meant for subclassing. - This class inherits from the `rv_discrete` class of `scipy.stats` and - contains all of its functionality. See the docstring of `rv_discrete` for - information on usage and subclassing. In addition, this class adds two new + This class inherits from the `rv_discrete` class of `scipy.stats` and + contains all of its functionality. See the docstring of `rv_discrete` for + information on usage and subclassing. In addition, this class adds two new methods. Methods ------- translate_args - takes user-friendly params as input and returns shape params - fit2 - estimates distribution params from data + Shape parameters given user-friendly parameters (see notes) + fit_mle + Shape parameters given data and optional keyword arguments (see notes) + rank + Rank abundance distribution """ - def translate_args(self, *args): - """ - Translates user-friendly arguments into shape parameters - - See distribution docstring for description of user arguments and shape - parameters. - - Parameters - ---------- - uargs : floats - User argument(s), usually easily measured and specified - - Returns - ------- - tuple of floats - Shape parameter(s) of distribution - - Notes - ----- - """ + @doc_sub(_doc_translate_args) + def translate_args(self, *args): + """{0}""" raise NotImplementedError, ("translate_args method not implemented " "for this distribution") + @doc_sub(_doc_fit_mle) + def fit_mle(self, *args): + """{0}""" + raise NotImplementedError, ("fit_mle method not implemented " + "for this distribution") - def fit2(self, *args): - """ - Return MLEs for shape parameters from data. - - Parameters - ---------- - data : array_like - Data to use in calculating the MLEs. - args : floats - Subset of shape parameters that are not fit. See Notes. - - Returns - ------- - tuple of floats - MLEs for shape parameters - - Notes - ----- - """ - - raise NotImplementedError, ("fit method not implemented for this " - "distribution") - + @doc_sub(_doc_rank) + def rank(self, n, *args): + """{0}""" + return self.ppf((np.arange(1, n+1) - 0.5) / n, *args) # # Discrete @@ -191,29 +193,23 @@ class geom_gen(rv_discrete_meco): r""" A geometric discrete random variable. - This implementation of the geometric distribution differs from that in + This implementation of the geometric distribution differs from that in `scipy.stats`, as the distribution here has support from 0 to inf. .. math:: - \mathrm{pmf(x)} = (1-p)^{x} p + P(x) = (1-p)^{x} p for ``x >= 0``. The ``loc`` parameter is not used. - There are many available methods of ``geom``, each of which require one or - more of the parameters listed below. - Methods ------- translate_args(mu) - Get shape parameter p from distribution mean - fit2(data) - ML estimate of p from data - + Shape parameter p given distribution mean. + fit_mle(data) + ML estimate of shape parameter p given data. %(before_notes)s mu : float distribution mean - data : array_like - values used to fit distribution """ @@ -222,11 +218,8 @@ def translate_args(self, mu): return 1 / (np.array(mu) + 1) @inherit_docstring_from(rv_discrete_meco) - def fit2(self, data): - """%(super)s - Requires one argument containing data to fit. - """ - return self.translate_args(np.mean(data)), + def fit_mle(self, data): + return self.translate_args(np.mean(data)), def _argcheck(self, p): return (p <= 1) & (p >= 0) @@ -235,7 +228,7 @@ def _pmf(self, x, p): return (1-p)**x * p def _logpmf(self, x, p): - return k*np.log(1-p) + log(p) + return x*np.log(1-p) + log(p) def _cdf(self, x, p): x = np.floor(x) @@ -255,41 +248,34 @@ class geom_uptrunc_gen(rv_discrete_meco): .. math:: - \mathrm{pmf(x)} = \frac{(1-p)^{x} p}{1 - (1-p)^{b+1}} + P(x) = \frac{(1-p)^{x} p}{1 - (1-p)^{b+1}} - for ``x >= 0``. - - `geom_uptrunc` takes two shape parameters: ``p`` and ``b``, the upper - limit. The ``loc`` parameter is not used. - - There are many available methods of `geom_uptrunc`, each of which require - one or more of the parameters listed below. + for ``x >= 0``. ``geom_uptrunc`` takes two shape parameters: ``p`` and + ``b``, the upper limit. The ``loc`` parameter is not used. Methods ------- translate_args(mu, b) - Get shape parameter p from distribution mean and upper limit - fit2(data, b=sum(data)) - ML estimate of p from data and upper limit (returns p, b) - + Shape parameter p given distribution mean and upper limit. + fit_mle(data, b=sum(data)) + ML estimate of shape parameter p given data and upper limit. %(before_notes)s mu : float distribution mean b : float distribution upper limit, defaults to sum of data - data : array_like - values used to fit distribution Notes ----- - The boundary ``p = 1`` is a special case in which the ratio between - successive terms of the distribution is 1 (i.e., the pmf is uniform). This - arises when the mean of the distribution is precisely one-half the upper + The boundary ``p = 1`` is a special case in which the ratio between + successive terms of the distribution is 1 (i.e., the pmf is uniform). This + arises when the mean of the distribution is precisely one-half the upper limit. - This distribution is known as the Pi distribution in the MaxEnt Theory of - Ecology [#]_, where the ``p`` parameter is given by ``1 - exp(-lambda)``. - The special case of a uniform pmf has been described as HEAP [#]_. + This distribution is known as the Pi distribution in the MaxEnt Theory of + Ecology [#]_, where the ``p`` parameter is equivalent to ``1 - + exp(-lambda)``. The special case of a uniform pmf has been described as + HEAP [#]_. References ---------- @@ -298,24 +284,23 @@ class geom_uptrunc_gen(rv_discrete_meco): Abundance, Distribution, and Energetics (p. 264). Oxford, United Kingdom: Oxford University Press. .. [#] - Harte, J., Conlisk, E., Ostling, A., Green, J. L., & Smith, A. B. - (2005). A theory of spatial structure in ecological communities at + Harte, J., Conlisk, E., Ostling, A., Green, J. L., & Smith, A. B. + (2005). A theory of spatial structure in ecological communities at multiple spatial scales. Ecological Monographs, 75(2), 179-197. """ - - # TODO: Should add a warning for b < 5 or 10 or so (p solver gives erratic - # answers. + # TODO: Should add a warning for b < 5 or 10 or so (p solver gives erratic + # answers. (This may or may not still be true.) @inherit_docstring_from(rv_discrete_meco) def translate_args(self, mu, b): return _geom_solve_p_from_mu_vect(mu, b), b @inherit_docstring_from(rv_discrete_meco) - def fit2(self, data, b=None): + def fit_mle(self, data, b=None): """%(super)s - Requires two arguments consisting of data to fit and ``b``, the upper - limit of the distribution (held constant). + In addition to data, requires ``b``, the upper limit of the + distribution. """ # Take mean of data as MLE of distribution mean, then calculate p mu = np.mean(data) @@ -323,7 +308,8 @@ def fit2(self, data, b=None): b = np.sum(data) p = _geom_solve_p_from_mu_vect(mu, b) - if len(np.atleast_1d(p)) == 1: # Just return float, not len 1 array + # Just return float, not len 1 array + if len(np.atleast_1d(p)) == 1: return float(p), b else: return p, b @@ -340,10 +326,10 @@ def _pmf(self, x, p, b): def _cdf(self, x, p, b): x = np.floor(x) cdf = (1.0-(1.0-p)**(x+1)) / (1.0-(1.0-p)**(b+1)) - try: - cdf[x > b] = 1 # Only valid if len(x)>1 - except: - pass + if len(np.atleast_1d(x)) > 1: + cdf[x > b] = 1 + elif x > b: + cdf = 1 return cdf def _stats(self, p, b): @@ -372,34 +358,31 @@ class nbinom_gen(spdist.nbinom_gen): r""" A negative binomial discrete random variable. - This implementation of the geometric distribution differs from that in - `scipy.stats`, as the distribution here uses the more common ecological + This implementation of the negative binomial distribution differs from that + in `scipy.stats`, as the distribution here uses the more common ecological parameterization. .. math:: - - \mathrm{pmf(x)} = + + P(x) = \frac{\Gamma (k + x)}{\Gamma(k) x!} \left(\frac{k}{k+\mu}\right)^k \left(\frac{\mu}{k+\mu}\right)^x - for ``x >= 0``. In the traditional parameterization, ``n = k`` (the size + for ``x >= 0``. In the traditional parameterization, ``n = k`` (the size parameter) and ``p = k / (k + mu)``. The ``loc`` parameter is not used. Methods ------- - translate_args(mu) - Get shape parameter p from distribution mean - fit2(data, k_range=(0.1,100,0.1)) - ML estimate of mu and k from data, with k evaluated at (min, max, step) - values given by k_range - + translate_args(mu, k) + Not used, returns mu and k. + fit_mle(data, k_range=(0.1,100,0.1)) + ML estimate of shape parameters mu and k given data, with k evaluated + at (min, max, step) values given by k_range. %(before_notes)s mu : float distribution mean k : float clustering parameter - data : array_like - values used to fit distribution """ @@ -408,18 +391,15 @@ def translate_args(self, mu, k): return mu, k @inherit_docstring_from(rv_discrete_meco) - def fit2(self, data, k_range=(0.1,100,0.1)): + def fit_mle(self, data, k_range=(0.1,100,0.1)): """%(super)s - Requires one argument containing data to fit. A keyword argument - k_range contains a tuple of the start, stop, and step values to search - for k. Default is ``k_range=(0.1,100,0.1)``. - - This method recognizes that the MLE of the mu parameter is simply equal - to the mean of the data. A brute force search is then used to find the - parameter k. + In addition to data, gives an optional keyword argument + k_range contains a tuple of the start, stop, and step values to search + for k. Default is ``k_range=(0.1,100,0.1)``. A brute force search is + then used to find the parameter k. """ - #assert len(data)>20, "nbinom fit is not stable with <20 data points" + # TODO: Check and mention in docstring biases of MLE for k mu = np.mean(data) return mu, _nbinom_solve_k_from_mu(data, mu, k_range) @@ -471,8 +451,7 @@ def _nbinom_solve_k_from_mu(data, mu, k_range): """ For the nbinom, given mu, return k from searching some k_range. """ - - # TODO: See if a root finder like fminbound would work with Decimal used in + # TODO: See if a root finder like fminbound would work with Decimal used in # logpmf method (will this work with arrays?) def nll(data, mu, k): @@ -497,24 +476,20 @@ class expon_gen(rv_continuous_meco): An exponential continuous random variable. .. math:: - - \mathrm{pdf(x)} = \lambda e^{-\lambda x} - for ``x >= 0``. The ``loc`` and ``scale`` parameters are not used. + f(x) = \lambda e^{-\lambda x} + for ``x >= 0``. The ``loc`` and ``scale`` parameters are not used. Methods ------- translate_args(mu) - Get shape parameter lam from distribution mean - fit2(data) - ML estimate of lam from data - + Shape parameter mu given distribution mean. + fit_mle(data) + ML estimate of shape parameter lam given data. %(before_notes)s mu : float distribution mean - data : array_like - values used to fit distribution """ @@ -523,9 +498,9 @@ def translate_args(self, mu): return 1 / mu @inherit_docstring_from(rv_continuous_meco) - def fit2(self, data): + def fit_mle(self, data): expon = expon_gen(a=0.0) - return 1/expon.fit(data, floc=0)[2], + return 1 / expon.fit(data, floc=0)[2], def _rvs(self, lam): return nprand.exponential(1/lam, self._size) @@ -550,25 +525,22 @@ class expon_uptrunc_gen(rv_continuous_meco): An upper-truncated exponential continuous random variable. .. math:: - - \mathrm{pdf(x)} = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda x}} + + f(x) = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda x}} for ``b >= x >= 0``. The ``loc`` and ``scale`` parameters are not used. Methods ------- translate_args(mu, b) - Get shape parameter lam from distribution mean and upper limit - fit2(data, b=sum(data)) - ML estimate of lam from data (returns lam, b) - + Shape parameter lam given distribution mean and upper limit. + fit_mle(data, b=sum(data)) + ML estimate of shape parameter lam given data and upper limit. %(before_notes)s mu : float distribution mean b : float distribution upper limit, defaults to sum of data - data : array_like - values used to fit distribution """ @@ -582,7 +554,11 @@ def translate_args(self, mu, b): raise NotImplementedError, "Translation of mu to lam not implemented" @inherit_docstring_from(rv_continuous_meco) - def fit2(self, data, b=None): + def fit_mle(self, data, b=None): + """%(super)s + In addition to data, requires ``b``, the upper limit of the + distribution. + """ if not b: b = np.sum(data) expon = expon_gen(a=0.0, b=b) diff --git a/macroeco/models/test_models.py b/macroeco/models/test_models.py index adcdebf..627dd0d 100644 --- a/macroeco/models/test_models.py +++ b/macroeco/models/test_models.py @@ -5,7 +5,7 @@ from __future__ import division -from numpy.testing import (TestCase, assert_equal, assert_array_equal, +from numpy.testing import (TestCase, assert_equal, assert_array_equal, assert_almost_equal, assert_array_almost_equal, assert_allclose, assert_, assert_raises) @@ -31,13 +31,13 @@ def test_mean(self): def test_cdf(self): vals = geom.cdf([0,1,2], 0.5) assert_array_almost_equal(vals, [0.5,0.75,0.875]) - + def test_translate_args(self): ps = geom.translate_args([10, 20]) assert_array_almost_equal(ps, [1/11, 1/21]) - def test_fit2(self): - p = geom.fit2([1,2,4,5]) + def test_fit_mle(self): + p = geom.fit_mle([1,2,4,5]) assert_almost_equal(p, 0.25) @@ -58,14 +58,14 @@ def test_cdf_x_len_1(self): # cdf should be not throw error even if x is len 1 vals = geom_uptrunc.cdf(0, 0.5, 2) assert_almost_equal(vals, 0.5/0.875) - + def test_mean(self): mu1 = geom_uptrunc.mean(0.801, 32) assert_almost_equal(mu1, 4, decimal=2) def test_translate_args_harte_16(self): # TODO: The Harte figures appear to be inaccurate, generate better - # canonical test case for next two tests and for test_fit2 and + # canonical test case for next two tests and for test_fit_mle and # test_mean # From Harte 2011, Oxford U Press, Tab 7.4, n0=16 row, Eq 7.50 @@ -98,11 +98,11 @@ def test_translate_args_with_sum_of_pmf(self): p2, b2 = geom_uptrunc.translate_args(120, 200) # Arbitrary assert_array_almost_equal(1,np.sum(geom_uptrunc.pmf(range(201),p2,b2))) - def test_fit2(self): - p1, _ = geom_uptrunc.fit2([0,10], 10) + def test_fit_mle(self): + p1, _ = geom_uptrunc.fit_mle([0,10], 10) assert_almost_equal(p1, 0) - p2, _ = geom_uptrunc.fit2([1,3], 16) + p2, _ = geom_uptrunc.fit_mle([1,3], 16) assert_almost_equal(p2, 1-0.669, decimal=2) @@ -127,24 +127,24 @@ def test_mean_var(self): def test_get_p_from_mu(self): assert_almost_equal(nbinom._get_p_from_mu(10, 2), 2/12) - def test_fit2_with_rvs(self): + def test_fit_mle_with_rvs(self): np.random.seed(8) x = nbinom.rvs(20, 10, size=100) - mu, k = nbinom.fit2(x) + mu, k = nbinom.fit_mle(x) assert_array_almost_equal([mu, k], [20, 10], decimal=0) - def test_fit2_with_R(self): + def test_fit_mle_with_R(self): #> library(MASS) #> fitdistr(seq(49), "negative binomial") x = np.array(range(1,50)) - mu, k = nbinom.fit2(x) + mu, k = nbinom.fit_mle(x) assert_array_almost_equal([mu, k], [25, 2.4337345], decimal=1) - def test_fit2_with_manual_calc(self): + def test_fit_mle_with_manual_calc(self): x = np.array([6,17,14,12,8,10,4,9,3,12,4,2,12,8,14,16,9,10,8,5,6]) - mu, k = nbinom.fit2(x, k_range=(0.01,10,0.01)) + mu, k = nbinom.fit_mle(x, k_range=(0.01,10,0.01)) assert_array_almost_equal([mu, k], [9, 8.54], decimal=2) - + class TestExpon(TestCase): pass diff --git a/macroeco/test_distributions.py b/macroeco/test_distributions.py index 63e14d8..b677262 100644 --- a/macroeco/test_distributions.py +++ b/macroeco/test_distributions.py @@ -24,7 +24,7 @@ import scipy.stats as stats import matplotlib.pyplot as plt -# TODO: Need to add fit functions to tests with new fit functions. +# TODO: Need to add fit functions to tests with new fit functions. # TODO: Do we need to test rad's? Against what? @@ -43,10 +43,10 @@ def setUp(self): 15.87, 24.32, 101.25, 155]) self.sad = np.arange(1, 156) - + def test_logser(self): # Test error raising - self.assertRaises(AssertionError, logser(n_samp=234, tot_obs=67).pmf, + self.assertRaises(AssertionError, logser(n_samp=234, tot_obs=67).pmf, 1) self.assertRaises(AssertionError, logser(n_samp=34, tot_obs=0).pmf, 1) @@ -59,7 +59,7 @@ def test_logser(self): self.assertTrue(np.round(lgser.var['p'][0], decimals=4) == 0.9974) # Test cdf reaches 1 - cdf = np.round(logser(n_samp=45, tot_obs=1200).cdf(1200)[0][0], + cdf = np.round(logser(n_samp=45, tot_obs=1200).cdf(1200)[0][0], decimals=1) self.assertTrue(cdf == 1) @@ -92,13 +92,13 @@ def test_logser_ut(self): pmf = lg.pmf(1) self.assertTrue(np.round(-np.log(lg.var['x'][0]), decimals=6) == 0.000413) lg = logser_ut(n_samp=64, tot_obs=2**12 * 64) - pmf = lg.pmf(1) + pmf = lg.pmf(1) self.assertTrue(np.round(-np.log(lg.var['x'][0]), decimals=7) == 0.0000228) - + # Check that they don't fail logser_ut(n_samp=64, tot_obs=1000).rad() logser_ut(n_samp=64, tot_obs=1000).cdf((1,1,2,4,5,7,12)) - + # Test correct answer when n_samp == tot_obs lg = logser_ut(n_samp=31, tot_obs=31) pmf = lg.pmf([1,2,3,4,5]) @@ -108,9 +108,9 @@ def test_logser_ut(self): def test_logser_ut_appx(self): # Test error raising - self.assertRaises(AssertionError, logser_ut_appx(n_samp=234, + self.assertRaises(AssertionError, logser_ut_appx(n_samp=234, tot_obs=67).pmf, 1) - self.assertRaises(AssertionError, logser_ut_appx(n_samp=34, + self.assertRaises(AssertionError, logser_ut_appx(n_samp=34, tot_obs=0).pmf, 1) # Test that values equal values from John's book (Harte 2011) @@ -140,8 +140,8 @@ def test_logser_ut_appx(self): # Test that they don't fail logser_ut_appx(n_samp=64, tot_obs=1000).rad() logser_ut_appx(n_samp=64, tot_obs=1000).cdf((1,1,2,4,5,7,12)) - - + + def test_plognorm(self): # TODO: Should test against Ethans psolver @@ -160,7 +160,7 @@ def test_plognorm(self): # Test pmf is zero when mu or sigma negative self.assertTrue(sum(np.round(plognorm(mu=-3,sigma=3).\ - pmf([1,2,3,4,5])[0], decimals=3)) == 0) + pmf([1,2,3,4,5])[0], decimals=3)) == 0) self.assertTrue(sum(np.round(plognorm(mu=3,sigma=-3).\ pmf([1,2,3,4,5])[0], decimals=3)) == 0) @@ -178,13 +178,13 @@ def test_plognorm(self): plognorm().fit([self.abund_list[0]]) plognorm(mu=2, sigma=2).cdf(5) - + def test_plognorm_lt(self): #Test our pmf against R's poilog R_zero_trun = [0.11620, 0.07216, 0.05201, 0.04049, 0.02783, 0.02398, 0.00686] - pred_plog = plognorm_lt(mu=2, sigma=3).pmf([1,2,3,4,6,7,23])[0] + pred_plog = plognorm_lt(mu=2, sigma=3).pmf([1,2,3,4,6,7,23])[0] self.assertTrue(np.array_equal(R_zero_trun, np.round(pred_plog, decimals=5))) @@ -211,8 +211,8 @@ def test_plognorm_lt(self): plognorm_lt(mu=2, sigma=2).pmf([2,3,4,5,23]) plognorm_lt().fit([self.abund_list[0]]) plognorm_lt(mu=10, sigma=1).cdf(45) - - + + def test_lognorm(self): # Test pmf against R output @@ -227,15 +227,15 @@ def test_lognorm(self): diff = r_output - lnorm self.assertTrue(np.all(diff == 0)) - lnorm = np.round(lognorm(tot_obs = np.exp(1.5 + (1.2**2 / 2)) * 50, + lnorm = np.round(lognorm(tot_obs = np.exp(1.5 + (1.2**2 / 2)) * 50, n_samp=50,sigma=1.2).pmf([1,2,3,4,5,6,7,12,45])[0], decimals=4) diff = r_output2 - lnorm self.assertTrue(np.all(diff == 0)) # Test cdf against R cdf - rcdf = np.array([0.3319, 0.3319, 0.4869, 0.5127, 0.6124]) - pycdf = np.round(lognorm(tot_obs=np.exp(1.5 + (3.45**2 / 2)), n_samp=1, + rcdf = np.array([0.3319, 0.3319, 0.4869, 0.5127, 0.6124]) + pycdf = np.round(lognorm(tot_obs=np.exp(1.5 + (3.45**2 / 2)), n_samp=1, sigma=3.45).cdf([1,1,4,5,12])[0], decimals=4) diff = rcdf - pycdf self.assertTrue(np.all(diff == 0)) @@ -263,7 +263,7 @@ def test_lognorm(self): pyfit2 = lognorm().fit([fit_array2]).params['sigma'][0] diff = r_lognorm_fits - np.round([pyfit1, pyfit2], decimals=5) self.assertTrue(np.all(diff == 0)) - + # Test that these don't fail lognorm().fit([self.abund_list[0]]) tot_obs=sum(self.abund_list[0]) @@ -274,9 +274,9 @@ def test_lognorm(self): dist = lognorm().fit(self.abund_list) dist.pmf(3) dist.pmf([[3],[4],[5],[6]]) - self.assertTrue(len(dist.params['tot_obs']) == 4) + self.assertTrue(len(dist.params['tot_obs']) == 4) + - def test_geo_ser(self): # TODO: Test pmf. # Visually, the CDF should be a straight line on a log(abundance) vs. @@ -305,7 +305,7 @@ def test_geo_ser(self): dist = geo_ser().fit(self.abund_list) self.assertTrue(len(dist.params['k']) == 4) - + def test_broken_stick(self): # Test that n_except throws approriate error if length n_samp and tot_obs are not # the same as length pmf @@ -326,7 +326,7 @@ def test_broken_stick(self): diff = np.array(expt) - bs self.assertTrue(np.all(diff == 0)) - # Test that these don't fail + # Test that these don't fail broken_stick(n_samp=23, tot_obs=500).cdf([1,2,500]) broken_stick(n_samp=23, tot_obs=500).rad() @@ -338,14 +338,14 @@ def test_broken_stick(self): ab in self.abund_list]))) def test_dgamma(self): - + # Don't have any good published graphs to test it against. Test # everything is working obs_sad = [103,115,13,2,67,36,51,8,6,61,10,21,7,65,4,49,92,37,16,6,23,\ 9,2,6,5,4,1,3,1,9,2] dg = dgamma().fit([obs_sad]) - + # Check that the parameters are in vars self.assertTrue('alpha' in dg.var) self.assertTrue('theta' in dg.var) @@ -372,10 +372,10 @@ def test_sugihara(self): self.assertRaises(NotImplementedError, sugihara().cdf, 34) self.assertRaises(NotImplementedError, sugihara().pdf, 23) - + def test_binm(self): # Using scipy.binom which is already unit tested. - + # Check that pdf and cdf give correct answers dist = binm(tot_obs=8123, n_samp=10) self.assertTrue(dist.cdf(8123)[0][0] == 1) @@ -388,7 +388,7 @@ def test_binm(self): # Check that fit works dist = binm().fit(self.abund_list) - + def test_pois(self): # Using scipy.poisson which is already unit tested @@ -421,7 +421,7 @@ def test_nbd(self): geo_data = np.random.geometric(p, size=10000) dist = nbd().fit([geo_data]) self.assertTrue(np.round(dist.params['k'][0], decimals=1) == 1) - + def test_nbd_lt(self): # TODO: test pmf @@ -464,7 +464,7 @@ def test_fnbd(self): # Test that no error is thrown if a zero is passed fnbd().fit([[0,1,2,3,4,5,6]]) - + # TypeError if k is not given dist = fnbd(tot_obs=2300, n_samp=20) self.assertRaises(TypeError, dist.pmf, 45) @@ -521,7 +521,7 @@ def test_fnbd(self): plt.clf() # Based on Zillio and He 2010, Calculating a few pmf values by hand. - # Going to test the fnbd against these values. + # Going to test the fnbd against these values. def test_geo(self): # This is just a wrapper function for nbd. Already tested. Will just @@ -531,7 +531,7 @@ def test_geo(self): test = geo().fit([[0,0,0,1,4,67], [1,1,3,5,23]]) self.assertTrue(np.all(test.params['tot_obs'] == np.array([72, 33]))) self.assertTrue(np.all(test.params['n_samp'] == np.array([6,5]))) - + # Test that tot_obs is broadcast test = geo(tot_obs=456, n_samp = [34,56,12]) test.pmf(0) @@ -551,12 +551,12 @@ def test_geo(self): self.assertTrue(np.array_equal(test_geo[i], test_nbd[i])) def test_fgeo(self): - + # Test fit work and returns expected results test = fgeo().fit([[0,0,0,1,4,67], [1,1,3,5,23]]) self.assertTrue(np.all(test.params['tot_obs'] == np.array([72, 33]))) self.assertTrue(np.all(test.params['n_samp'] == np.array([6,5]))) - + # Test that tot_obs is broadcast test = fgeo(tot_obs=456, n_samp = [34,56,12]) test.pmf(0) @@ -604,25 +604,25 @@ def test_tgeo(self): # Test tgeo cdf is one dist = tgeo(n_samp=10, tot_obs=2345) self.assertTrue(np.round(dist.cdf(2345)[0][0], decimals=1) == 1.0) - + # When n_samp < 2 weird things happen # Testing Lagrange multiplier against values generated by hand # [(n=60, a=.1), (n=340, a=.6), (n=34, a=.9), (n=12, a=.9), (n=2, .9), # (n=1, a=.1),(n=1, a=0.0001), x_vals = np.array([.8572, 1.0036, 1.2937, 1.8298, 5.6056, 0.1111]) - tg = tgeo(tot_obs=[60,340,34,12, 2, 1], + tg = tgeo(tot_obs=[60,340,34,12, 2, 1], n_samp=(1./.1, 1/.6, 1/.9, 1/.9, 1/.9, 1/.1)) tg.pmf(0) pred_vals = np.round(tg.var['x'], decimals=4) self.assertTrue(np.array_equal(x_vals, pred_vals)) - + x_vals = np.array([1.0e-4, 1.0e-5]) tg = tgeo(tot_obs=[1,1], n_samp=[1/.0001, 1/.00001]) tg.pmf(0) pred_vals = np.round(tg.var['x'], decimals=6) self.assertTrue(np.array_equal(x_vals, pred_vals)) - - # Optimizer is starting to round. Tried brentq, bisect and fsolve + + # Optimizer is starting to round. Tried brentq, bisect and fsolve x_vals = np.array([9, 11]) tg = tgeo(tot_obs=[1,10], n_samp=[1/.9, 1/.99]) tg.pmf(0) @@ -640,17 +640,17 @@ def test_tgeo(self): # Test that pdf and cdf give correct values check = dist.pmf([1,1,2,3,4,5,12,34,65]) self.assertTrue(dist.cdf(0)[0][0] == dist.pmf(0)[0][0]) - self.assertTrue(dist.cdf(23)[0][0] == + self.assertTrue(dist.cdf(23)[0][0] == np.sum(dist.pmf(np.arange(0,24))[0])) # Test that fit provides the correct number of tot_obs. Already have # tested generic fit method. dist = tgeo().fit(self.abund_list) self.assertTrue(len(dist.params['tot_obs']) == 4) - - + + def test_mete_sar_iter(self): - + # Check mete sar against EW values EWsar_down = np.array([8.79, 12.37, 16.71, 21.81, 27.59, 34]) #S = 23, N=3400, anchor_area=123, target_area=2000) @@ -675,21 +675,21 @@ def test_mete_sar_iter(self): , downscale=6) self.assertTrue(len(sar) == 11) - # Check that only halving or doubling results are returned when + # Check that only halving or doubling results are returned when # non_iter=True sar = mete_sar_iter(n_samp=34, tot_obs=1000).iter_vals([1,2,.5,.25,5,.4], non_iter=True) - self.assertTrue(len(sar) == 4) + self.assertTrue(len(sar) == 4) # Check errors are thrown sar = mete_sar_iter(n_samp=34, tot_obs=1000) - # Check that fit method fits correctly with two arguments passed + # Check that fit method fits correctly with two arguments passed sar = mete_sar_iter().fit(self.sad, self.sar) self.assertTrue(sar.params['n_samp'] == 155) self.assertTrue(sar.params['tot_obs'] == sum(np.arange(1, 156))) - # Check that fit method fits correctly with one argument passed + # Check that fit method fits correctly with one argument passed sar = mete_sar_iter().fit(self.sad) self.assertTrue(sar.params['n_samp'] == 155) self.assertTrue(sar.params['tot_obs'] == sum(np.arange(1, 156))) @@ -726,7 +726,7 @@ def test_power_law(self): sar = powerlaw().fit(self.sad, self.sar) g = sar.vals([1]) self.assertTrue(np.round(g['items'][0], decimals=0) == 200) - + # Check that c and z exist and check values of other parameters. sar.params['c']; sar.params['z'] self.assertTrue(sar.params['n_samp'] == 155) @@ -745,10 +745,10 @@ def test_power_law(self): self.assertTrue(not(np.array_equal(res1['x_over_y'], res2['x_over_y']))) self.assertTrue((np.array_equal(res1['z'], res2['z']))) - + def test_gen_sar(self): '''Testing that this actually works''' - + # Testing that gen_sar actually runs. Not sure what values to test it # against. @@ -768,7 +768,7 @@ def test_gen_sar(self): base2 = gnsar.iter_vals([1,2,.8,.2,.3], base=2) base3 = gnsar.iter_vals([1,2,.8,.2,.3], base=3) self.assertTrue(not(np.array_equal(base2['area'], base3['area']))) - + # Test that non_iter=False, returns only areas that match a_list a_list1 = [1,2,.5,.25,.1] @@ -787,7 +787,7 @@ def test_gen_sar(self): # Non_iter should be overridden sar_arr = gnsar.iter_vals(downscale=1, upscale=1, non_iter=True) self.assertTrue(len(sar_arr) == 3) - + # Test vals and fit performs properly. Test that fit ignores all args # but the first one too. @@ -860,7 +860,7 @@ def test_psi(self): ps.rad() def test_nu(self): - + # Test error is raised when pdf called self.assertRaises(NotImplementedError, nu(n_samp=30, tot_obs=400, E=5000).pdf, 0) @@ -875,7 +875,7 @@ def test_nu(self): # Value with no support should equal 0 self.assertTrue(nudist.pmf(1)[0][0] == 0) self.assertTrue(nudist.cdf(1)[0][0] == 0) - + #Check that the last value in cdf is 1 self.assertTrue(np.round(nudist.cdf(E)[0][0], decimals=1) == 1) @@ -894,12 +894,11 @@ def test_nu(self): self.assertTrue(g.params['tot_obs'][0] == 28) self.assertTrue(g.params['n_samp'][0] == 7) self.assertTrue(g.params['E'][0] == 28) - + if __name__ == '__main__': unittest.main() - @@ -907,4 +906,5 @@ def test_nu(self): - + + From 6c17b11821cdff27c2d55b7f2b17a52233631d7f Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 15:35:41 -0700 Subject: [PATCH 136/343] Remove output, no longer needed --- macroeco/output.py | 1194 -------------------------------------------- 1 file changed, 1194 deletions(-) delete mode 100644 macroeco/output.py diff --git a/macroeco/output.py b/macroeco/output.py deleted file mode 100644 index f5c3e22..0000000 --- a/macroeco/output.py +++ /dev/null @@ -1,1194 +0,0 @@ -#!/usr/bin/python - -'''This module provides functions for outputting results of macroeco -analyses''' - - -from __future__ import division -import matplotlib.pyplot as plt -import numpy as np -import logging -from utils.form_func import output_form, add_field -import copy as cp -import os -import shutil - - -readme_info_plots =\ -''' -FOLDER DESCRIPTION -------------------- - -The folder {3} contains {0} files. There are {1} {4} represented as png -files and {2} csv files which contain the data required to generate each -plot. The csv files have identical names to the png files to which they -correspond. Each file name is a concatenation of the following strings: -analysis name, run name, data name, and {5}. An additional identifier is -appended to the file name after {5} in order to make each file unique. It is -either a species identifier or a number. - -On the right hand side of each plot, you will see a string that begins -'Criteria for plot'. The criteria are either a species name or string that -looks like - -'y': [('>=', 0.0), ('<', 150.0)], 'x': [('>=', 0.0), ('<', 50.0)] - -This can be interpreted as follows. The plot under consideration has 'y' values -greater than or equal to 0 and less than 150 and 'x' values greater than or -equal to 0 and less than 50. Similarly a criteria string of the form - -'year' : ('==' , 1998) - -can be interpreted as the plot under consideration has 'year' values equal to -1998. The criteria is determined by how you decided to divide your plot for the -analysis. A criteria string of the form - -'temperature' : ('==', 'cool') - -can be interpreted as the plot under consideration has 'temperature' values -equal to 'cool'. ''' - -readme_info_summary=\ -u""" -FOLDER DESCRIPTION ------------------- - -The folder {0} contains {1} txt file(s) and {1} csv file(s). Each .txt file -contains a summary for the plot generate by the criteria at the header of the -file. The criteria are either a species name or string that looks like - -'y': [('>=', 0.0), ('<', 150.0)], 'x': [('>=', 0.0), ('<', 50.0)] - -This can be interpreted as follows. The plot under consideration has 'y' values -greater than or equal to 0 and less than 150 and 'x' values greater than or -equal to 0 and less than 50. Similarly a criteria string of the form - -'year' : ('==' , 1998) - -can be interpreted as the plot under consideration has 'year' values equal to -1998. - -Each txt file has a corresponding csv plot with the AIC values in tabular form -for easy analysis. - -Each summary file contains summary statistics for the observed data and each -distribution to which the observed data was compared. Each file name is a -concatenation of the following strings: analysis name, data name and -summary_table or AIC_table. An additional identifier is appended to the file -name after summary_table in order to make each file unique. It is either a -species identifier, a number, or both.""" - -readme_info_rarity =\ -''' -FOLDER DESCRIPTION ------------------- - -The folder {0} contains {1} csv files. Each file contains the -columns 'data_name', 'criteria', 'observed', and any number of columns with -distribution names. These are the distributions to which the data was -compared. The column data_name gives the name of the data being examined, the -column criteria describes the specifications that made the given plot, the -remaining columns describe the number of items that had a value below a -prespecified minimum. The prespecified minimum can be found in the file name -immediately after '_<=_'. Each file name is a concatenation of the following -strings: analysis name, data name and 'rarity_<=_' some minimum. -''' - -readme_info_sar=\ -''' -FOLDER DESCRIPTION ------------------- - -The folder {0} contains {1} png files and {2} csv files. The png file(s) are -log-log SAR-EAR plot(s) with area_fraction on the x-axis and species on the y-axis. -The names of the png file(s) are a concatenation of the following strings: -analysis name, run_name, data_name, and SAR-EAR_plot. A number is appended to the -end of the plot to ensure the filename is unique. The csv -files contain the data required to make the given plot(s). Each csv file -contains two columns, species and area_fraction. area_fraction assigns the -base area a value of 1 and represents all other areas as a fraction of the base -area. The csv file name(s) are a concatenation of the following strings: -analysis_name, run_name, data_name, SAR-EAR_plot_, a unique number, and the SAR-EAR -name. - -''' - - -class DistributionOutput(object): - ''' - This formats and outputs analyses on distributions - - ''' - - def __init__(self, out_dir): - ''' - Parameters - ---------- - out_dir : string - String appended to output directory - ''' - - self.out_dir = out_dir - self.urns = 'Urns' - self.balls = 'Balls' - self.Nmax = 'Nmax' - self.rad_x_axis = 'Rank' - self.rad_y_axis = 'Abundance' - self.cdf_x_axis = 'Abundance' - self.cdf_y_axis = 'Cumulative Probability' - self.variable = 'abundance' - self.dist_name = '' - - - def write_summary_table(self, smry, criteria=None, species=None): - ''' - Parameters - --------- - smry : dict - A dictionary as returned by the function compare_summary within the - CompareDistribution class. - criteria : array-like object - An array-like object in which contains either string or dicts that - tell how each dataset was generated. Describes the subsetting of - an sad and the species ID of an ssad. - species : array_like object - If not None, must be the an array-like object of the same length as - criteria, but containing species strings. Can only be used if - criteria is also not None. - - Notes - ----- - Writes out a formatted txt file - - ''' - # Make output folder - folder_name = self.dist_name + '_summary_statistics_' + self.out_dir - make_directory(folder_name) - - tot_sad = len(smry['observed']['balls']) - if criteria != None: - assert len(criteria) == tot_sad, "len(criteria) must equal" + \ - " number of data arrays under consideration" - if species != None: - assert len(species) == tot_sad, "len(species) must equal" + \ - " number of data arrays under consideration" - ob = smry['observed'] - - count = 0 - for i in xrange(tot_sad): - if criteria != None and species != None: - filename = os.path.join(folder_name, self.out_dir + \ - '_summary_table_' + str(species[i]) + '_' + str(i) + - '.txt') - filename_aic = os.path.join(folder_name, self.out_dir + \ - '_AIC_table_' + str(species[i]) + '_' + str(i)) - - elif criteria != None and np.all([type(crt) != dict for crt in - criteria]): - filename = os.path.join(folder_name, self.out_dir + \ - '_summary_table_' + str(criteria[i]) + '.txt') - filename_aic = os.path.join(folder_name, self.out_dir + \ - '_AIC_table_' + str(criteria[i])) - - - else: - filename = os.path.join(folder_name, self.out_dir + - '_summary_table_' + str(i) + '.txt') - filename_aic = os.path.join(folder_name, self.out_dir + - '_AIC_table_' + str(i)) - - - fout = open(filename, 'w') - logging.info('Writing summary table %s' % filename) - - - if criteria != None and species != None: - - fout.write('CRITERIA: ' + str(criteria[i]) + '\n' + - 'SPECIES: ' + str(species[i]) + '\n\n') - - elif criteria != None: - fout.write('CRITERIA: ' + str(criteria[i]) + '\n\n') - - else: - fout.write('CRITERIA: NONE ' + str(i) + '\n\n') - - # Getting rarity - ob_rare = {} - for mins in ob['tot_min'].iterkeys(): - ob_rare['<=' + str(mins)] = ob['tot_min'][mins][i] - - fout.write('EMPIRICAL VALUES:\n' + self.urns + ' = ' + - str(ob['urns'][i]) + '\n' + self.balls + ' = ' + - str(ob['balls'][i]) + '\nObserved ' + self.Nmax + ' = ' + - str(ob['max'][i]) + '\nObserved Rarity = ' + - str(ob_rare) + '\n\n') - - - # Also output AIC values in for each table. Could add other - # measures to this table as well. - # Might break this out later - aic_vals = {} - - for kw in smry.iterkeys(): - if kw != 'observed': - dt= smry[kw] - # set relevant aic values for table output - aic_vals[kw]={'AIC_weights' : dt['aic_w'][i], 'Delta_AIC' : - dt['aic_d'][i], 'Parameter_number' : - dt['par_num'][i], 'Corrected_AIC' : - dt['aic'][i]} - # Getting rarity - dt_rare = {} - for mins in dt['tot_min'].iterkeys(): - dt_rare['<=' + str(mins)] = dt['tot_min'][mins][i] - dt_vars = {} - for key in dt['vars'].iterkeys(): - if len(dt['vars'][key]) != 0: - dt_vars[key] = dt['vars'][key][i] - - fout.write('PREDICTED DISTRIBUTION : ' + kw + '\n' + - self.urns + ' = ' + str(dt['urns'][i]) + '\n' + - self.balls + ' = ' + str(dt['balls'][i]) + - '\nAIC = ' + str(dt['aic'][i]) + '\nDelta_AIC = ' + - str(dt['aic_d'][i]) + '\nAIC_weight = ' + - str(dt['aic_w'][i]) + '\nNumber of Parameters = ' + - str(dt['par_num'][i]) + '\nPredicted '+ self.Nmax + ' = ' + - str(dt['max'][i]) + '\nPredicted Rarity = ' + - str(dt_rare) + '\nOther Variables = ' + - str(dt_vars) + '\n\n') - fout.close() - count += 1 - - # Make and print AIC table - dtype = [('Model', 'S30'), ('Parameter_number', np.float), - ('Corrected_AIC', np.float), ('AIC_weights', np.float), - ('Delta_AIC', np.float)] - aic_array = np.empty(len(aic_vals), dtype=dtype) - for j, model_name in enumerate(aic_vals.iterkeys()): - aic_array['Model'][j] = model_name - aic_array['Parameter_number'][j] =\ - aic_vals[model_name]['Parameter_number'] - aic_array['Corrected_AIC'][j] =\ - aic_vals[model_name]['Corrected_AIC'] - aic_array['AIC_weights'][j] =\ - aic_vals[model_name]['AIC_weights'] - aic_array['Delta_AIC'][j] =\ - aic_vals[model_name]['Delta_AIC'] - output_form(aic_array, filename_aic) - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_summary.format(folder_name, count)) - fout.close() - - - - def plot_rads(self, rads, criteria=None, species=None): - ''' - Plotting the observed and predicted rank abundance distributions - - Parameters - ---------- - rads : dict - A dictionary that is returned from the function compare_rads in the - CompareDistribution class. - - criteria : list of objects - If not none, the objects in criteria will be printed as strings in - the plots and/or file names. They will only be included in the - file name if they are strings. - - species : list - A list of species names to be included in the csv file. Must - contain the same number of iterables - - Notes - ----- - Saves RAD plots to given out_dir. Saves as many plots as there are - observed distributions. - - ''' - folder_name = 'rank_abundance_plots_' + self.out_dir - make_directory(folder_name) - - tot_sad = len(rads['observed']) - recs = make_rec_from_dict(rads, tot_sad, species=species) - - if criteria != None: - assert len(criteria) == tot_sad, "len(criteria) must equal" + \ - " number of data arrays under consideration" - count = 0 - for i, data in enumerate(recs): - - # Plot all columns of the rec array - plot_rec_columns(data) - plt.semilogy() - plt.ylabel('Log ' + self.rad_y_axis) - plt.xlabel(self.rad_x_axis) - - if criteria != None and np.all([type(crt) != dict for crt in - criteria]): - plt.title('Rank abundance distribution for ' + str(criteria[i])) - filename = os.path.join(folder_name, self.out_dir + - '_rank_abundance_plot_' + str(criteria[i])) - - logging.info('Saving figure and csv ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - count += 2 - - elif criteria != None and np.all([type(crt) == dict for crt in - criteria]): - plt.title('Rank abundance distribution') - plt.figtext(.97, .5, 'Criteria for plot: ' + str(criteria[i]), - rotation='vertical', size=8, - horizontalalignment='center', - verticalalignment='center') - - filename = os.path.join(folder_name, self.out_dir + - '_rank_abundance_plot_' + str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - count += 2 - - else: - plt.title('Rank abundance distribution: plot number ' + str(i)) - - filename = os.path.join(folder_name, self.out_dir + - '_rank_abundance_plot_' + str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - count += 2 - - plt.clf() - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_plots.format(count, count /2, count/2, - folder_name, 'rank abundance plots (RAD)', 'rank_abundance_plot')) - fout.close() - - - def plot_cdfs(self, cdfs, obs_data, criteria=None, species=None): - - ''' - - Plots observed vs predicted cdfs and returns a csv file with values - used for plotting. - - - Parameters - ---------- - cdfs : dict - A dictionary that is returned from the function compare_cdfs in the - CompareDistribution class. - - obs_data : list - A list of arrays. The observed data - (CompareDistribution.observed_data) - - criteria : dict or None - The criteria for splitting the data. Can be species names. If not - None, the criteria will be printed on the plots - - species : array-like object or None - The species names that will be added to the csv files. - - ''' - # Make directory - folder_name = self.dist_name + '_cdf_plots_' + self.out_dir - make_directory(folder_name) - - # SEDOutput could pass in tuple - spp = None - if type(cdfs) == type((1,)) and len(cdfs) == 2: - spp = cdfs[1] - cdfs = cdfs[0] - - tot_sad = len(cdfs['observed']) - recs = make_rec_from_dict(cdfs, tot_sad, add_rank=False) - if criteria != None: - assert len(criteria) == tot_sad, "len(criteria) must equal" + \ - " number of data arrays under consideration" - - count = 0 - for i, data in enumerate(recs): - - names = data.dtype.names - for nm in names: - fig = plt.plot(np.sort(obs_data[i]), np.sort(data[nm]), '-o') - - # Formatting - fig[0].axes.xaxis.tick_bottom() - fig[0].axes.yaxis.tick_left() - ylim = list(plt.ylim()) - if ylim[0] == 0: - ylim[0] = -.1 - plt.ylim((ylim[0], 1.1)) - xlim = plt.xlim() - plt.xlim((.9, xlim[1] + 10)) - plt.legend(names, loc='best') - plt.semilogx() - plt.ylabel(self.cdf_y_axis) - plt.xlabel('Log ' + self.cdf_x_axis) - - # Add observed to cdf array - if species != None: - sorted_ab, sorted_spp = sort_rank_abund([obs_data[i]], - [species[i]]) - n_rec = add_field(data, [(self.variable, np.float)]) - n_rec = add_field(n_rec, [('species', 'S40')]) - n_rec[self.variable] = sorted_ab[0] - n_rec['species'] = sorted_spp[0] - else: - n_rec = add_field(data, [(self.variable, np.float)]) - n_rec[self.variable] = np.sort(obs_data[i]) - - # Used for SSAD - if criteria != None and spp == None and np.all([type(crt) != dict - for crt in criteria]): - - plt.title('Cumulative density function for species ' + str(criteria[i])) - - filename = os.path.join(folder_name, self.out_dir + - '_cdf_plot_' + str(criteria[i])) - logging.info('Saving figure and csv ' + filename) - plt.savefig(filename) - output_form(n_rec, filename) - count += 2 - - # Used for SAD - elif criteria != None and spp == None and np.all([type(crt) == dict - for crt in criteria]): - plt.title('Cumulative Density Function') - plt.figtext(.97, .5, 'Criteria for plot: ' + str(criteria[i]), - rotation='vertical', size=8, - horizontalalignment='center', - verticalalignment='center') - - filename = os.path.join(folder_name, self.out_dir + - '_cdf_plot_' + str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(n_rec, filename) - count += 2 - - # Used for SED - elif criteria != None and spp != None and np.all([type(crt) == dict - for crt in criteria]): - - plt.title('Cumulative Density Function for species ' + - str(spp[i])) - - plt.figtext(.97, .5, 'Criteria for plot: ' + str(criteria[i]), - rotation='vertical', size=8, - horizontalalignment='center', - verticalalignment='center') - - filename = os.path.join(folder_name, self.out_dir + - '_cdf_plot_' + str(spp[i]) + '_' + str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(n_rec, filename) - count += 2 - - - else: - plt.title('CDF: plot number ' + str(i)) - filename = os.path.join(folder_name, self.out_dir + - '_cdf_plot_' + str(i)) - logging.info('Saving figure and csv ' + filename) - plt.savefig(filename) - output_form(n_rec, filename) - count += 2 - - plt.clf() - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_plots.format(count, count/2, count/2, - folder_name, 'cumulative density plots (cdf)', 'cdf_plot')) - fout.close() - -class SADOutput(DistributionOutput): - ''' - Derived class for SAD output - ''' - - def __init__(self, out_dir): - ''' - Parameters - ---------- - out_dir : string - String appended to output directory - - ''' - self.out_dir = out_dir - self.urns = 'Species' - self.balls = 'Total Individuals' - self.Nmax = 'Nmax' - self.rad_x_axis = 'Rank' - self.rad_y_axis = 'Abundance' - self.cdf_x_axis = 'Abundance' - self.cdf_y_axis = 'Cumulative Probability' - self.variable = 'abundance' - self.dist_name = 'sad' - -class SSADOutput(DistributionOutput): - ''' - Derived class for SSAD output - ''' - - def __init__(self, out_dir): - ''' - Parameters - ---------- - out_dir : string - String appended to output directory - - ''' - self.out_dir = out_dir - self.urns = 'Cells' - self.balls = 'Individuals' - self.Nmax = 'Nmax' - self.rad_x_axis = 'Rank' - self.rad_y_axis = 'Abundance' - self.cdf_x_axis = 'Abundance' - self.cdf_y_axis = 'Cumulative Probability' - self.variable = 'abundance' - self.dist_name = 'ssad' - -class SAROutput(object): - ''' - This object interacts with CompareSARCurves - ''' - - def __init__(self, out_dir): - ''' - Parameters - ---------- - out_dir : string - String appended to output directory - ''' - self.out_dir = out_dir - - def plot_sars(self, sars, names=[], form='sar'): - ''' - Plots observed vs predicted sars - - Parameters - ---------- - sars : list of dicts - The output of CompareSARCurve method compare_curves - - names : list or strings - If not None, names is a list of the same length as sars. Gives the - desired names for the plots. - - ''' - - if form == 'sar': - file_str = '_SAR_plot_' - ylab = 'log(Species Number)' - stype = 'species' - folder_name = 'sar_plots_' + self.out_dir - make_directory(folder_name) - elif form == 'ear': - file_str = '_EAR_plot_' - ylab = 'log(Endemic Species Number)' - stype = 'endemic_species' - folder_name = 'ear_plots_' + self.out_dir - make_directory(folder_name) - else: - raise ValueError("Parameter 'form' must be 'ear' or 'sar' not '%s'" - % form) - - if len(names) != 0: - assert len(names) == len(sars); "Length of names must equal" + \ - "length of sars" - count = 0 - for i, sar in enumerate(sars): - filename = os.path.join(folder_name, self.out_dir + file_str + - str(i)) - legend = [] - for kw in sar.iterkeys(): - legend.append(kw) - if kw == 'observed': - fig = plt.plot(sar[kw]['area'], sar[kw]['items'], '-o') - else: - fig = plt.plot(sar[kw]['area'], sar[kw]['items']) - - # Change dtype names and output - defnm = sar[kw].dtype.names - sar[kw].dtype.names = (stype, 'area_fraction') - output_form(sar[kw], filename + '_' + kw) - sar[kw].dtype.names = defnm - - # Plot formatting - fig[0].axes.xaxis.tick_bottom() - fig[0].axes.yaxis.tick_left() - - plt.loglog() - plt.legend(tuple(legend), loc='best') - plt.xlabel('log(Area Fraction)') - plt.ylabel(ylab) - if len(names) != 0: - plt.title(names[i]) - else: - plt.title(form.upper() + ' plot %i' % (i)) - filename = os.path.join(folder_name, self.out_dir + file_str + - str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - plt.clf() - count += 1 - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_sar.format(folder_name, count, count * len(sar))) - fout.close() - - -class ASEDOutput(DistributionOutput): - ''' - Class outputs the average species energy distributions by interacting with - CompareASED - ''' - - def __init__(self, out_dir): - ''' - Parameters - ---------- - out_dir : string - String appended to output directory - ''' - self.out_dir = out_dir - self.urns = 'Species' - self.balls = 'Sum of Species Average Energies' - self.Nmax = 'Max Average Energy' - self.cdf_x_axis = 'Average Energy' - self.cdf_y_axis = 'Cumulative Probability' - self.variable = 'average energy' - self.dist_name = 'ased' - - def plot_rads(self, *args, **kwargs): - ''' - Not implemented for this class object - ''' - - raise NotImplementedError('plot_rads is not implemented for object %s' - % (self.__class__.__name__)) - - def plot_reds(self, reds, criteria=None, species=None): - ''' - Plotting the observed and predicted rank abundance distributions - - Parameters - ---------- - reds : dict - A dictionary that is returned from the function compare_reds in the - CompareASED class. - - criteria : list of objects - If not none, the objects in criteria will be printed a strings in - the plots and file names. - - Notes - ----- - Saves RAD plots to given out_dir. Saves as many plots as there are - observed distributions. - - ''' - folder_name = 'ased_rank_energy_plots_' + self.out_dir - make_directory(folder_name) - - tot_sad = len(reds['observed']) - recs = make_rec_from_dict(reds, tot_sad, species=species) - - if criteria != None: - assert len(criteria) == tot_sad, "len(criteria) must equal" + \ - " number of data arrays under consideration" - count = 0 - for i, data in enumerate(recs): - - # Plot all columns of the rec array - plot_rec_columns(data) - plt.semilogy() - plt.ylabel('Log Energy') - plt.xlabel('Rank') - - if criteria != None and np.all([type(crt) != dict for crt in - criteria]): - plt.title('ASED rank energy distribution for ' + - str(criteria[i])) - filename = os.path.join(folder_name, self.out_dir + - '_rank_abundance_plot_' + str(criteria[i])) - - logging.info('Saving figure and csv ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - count += 2 - - elif criteria != None and np.all([type(crt) == dict for crt in - criteria]): - plt.title('ASED rank energy distribution') - plt.figtext(.97, .5, 'Criteria for plot: ' + str(criteria[i]), - rotation='vertical', size=8, - horizontalalignment='center', - verticalalignment='center') - - filename = os.path.join(folder_name, self.out_dir + - '_ased_rank_energy_plot_' + str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - count += 2 - - else: - plt.title('ASED rank energy distribution: plot number ' + str(i)) - - filename = os.path.join(folder_name, self.out_dir + - '_ased_rank_energy_plot_' + str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - count += 2 - - plt.clf() - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_plots.format(count, count /2, count/2, - folder_name, - 'average species energy distribution (ASED) rank' + - ' energy plots', 'ased_rank_energy_plot')) - fout.close() - - - -class IEDOutput(DistributionOutput): - ''' - Class outputs individual energy distributions by interacting with - CompareIED - - ''' - - def __init__(self, out_dir): - ''' - Parameters - ---------- - out_dir : string - String appended to output directory - ''' - self.out_dir = out_dir - self.urns = 'Individuals' - self.balls = 'Energy' - self.Nmax = 'Max Energy' - self.cdf_x_axis = 'Energy' - self.cdf_y_axis = 'Cumulative Probability' - self.variable = 'energy' - self.dist_name = 'ied' - - def plot_rads(self, *args, **kwargs): - ''' - Not implemented for this class object - ''' - - raise NotImplementedError('plot_rads is not implemented for object %s' - % (self.__class__.__name__)) - - - def plot_reds(self, reds, criteria=None): - ''' - Saves plot and csv file with predicted and empirical rank energy data - - Parameters - ---------- - reds : tuple - The output from the CompareIED.compare_rads method - criteria : list or None - A list of dicts with the criteria for divisions. See Patch.sad - - Output - ------ - This method outputs both a plot and a csv that compare observed and - predicted individual rank energy curves for the entire community at the - given subset. - - ''' - folder_name = 'ied_rank_energy_plots_' + self.out_dir - make_directory(folder_name) - - - tot_reds = len(reds['observed']) - recs = make_rec_from_dict(reds, tot_reds) - if criteria != None: - assert len(criteria) == tot_reds, "len(criteria) must equal" + \ - " number of reds under consideration" - count = 0 - for i, data in enumerate(recs): - - #Plot all data in a single rec array - plot_rec_columns(data) - - # Make appropriate title for figure - if criteria != None: - plt.title('Rank Energy Distribution') - plt.figtext(.97, .5, 'Criteria for plot: ' + str(criteria[i]), - rotation='vertical', size=8, - horizontalalignment='center', - verticalalignment='center') - else: - plt.title('Rank Energy Distribution') - plt.figtext(.97, .5, 'Plot number: ' + str(i), - rotation='vertical', size=8, - horizontalalignment='center', - verticalalignment='center') - - plt.loglog() - plt.ylabel('Log Energy') - plt.xlabel('Log Rank') - - filename = os.path.join(folder_name, self.out_dir + - '_ied_rank_energy_' + str(i)) - - logging.info('Saving figure ' + filename) - plt.savefig(filename) - plt.clf() - output_form(recs[i], filename) - count += 2 - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_plots.format(count, count/2, count/2, - folder_name, - 'individual energy distribution (IED) rank energy plots', - 'ied_rank_energy')) - fout.close() - -class SEDOutput(DistributionOutput): - ''' - Class outputs species-level energy distributions by interacting with - CompareSED - - ''' - - def __init__(self, out_dir): - ''' - Parameters - ---------- - out_dir : string - String appended to output directory - ''' - self.out_dir = out_dir - self.urns = 'Individuals in Species' - self.balls = 'Energy' - self.Nmax = 'Max Energy' - self.cdf_x_axis = 'Energy' - self.cdf_y_axis = 'Cumulative Probability' - self.variable = 'energy' - self.dist_name = 'sed' - - def plot_rads(self, *args, **kwargs): - ''' - Not implemented for this class object - ''' - - raise NotImplementedError('plot_rads is not implemented for object %s' - % (self.__class__.__name__)) - - - def plot_reds(self, reds, criteria=None): - ''' - Saves plot and csv file with predicted and empirical rank energy data - - Parameters - ---------- - reds : tuple - The output from the CompareSED.compare_rads method with - return_spp=True. - criteria : list or None - A list of dicts with the criteria for divisions. See Patch.sad - - Output - ------ - This method outputs both a plot and a csv that compare observed and - predicted species-level rank energy curves. - - ''' - folder_name = 'sed_rank_energy_plots_' + self.out_dir - make_directory(folder_name) - - if type(reds) != type((1,)): - raise TypeError("Input reds must be a tuple. Set return_spp=True" + - " in CompareSED.compare_rads") - spp = reds[1] - tot_reds = len(reds[0]['observed']) - recs = make_rec_from_dict(reds[0], tot_reds) - if criteria != None: - assert len(criteria) == tot_reds, "len(criteria) must equal" + \ - " number of reds under consideration" - count = 0 - for i, data in enumerate(recs): - - plot_rec_columns(data) - plt.semilogx() - plt.ylabel('Energy') - plt.xlabel('Log Rank') - - if spp != None: - if criteria != None: - plt.title('Rank Energy Distribution for species ' + - str(spp[i])) - plt.figtext(.97, .5, 'Criteria for plot: ' + - str(criteria[i]), rotation='vertical', size=8, - horizontalalignment='center', - verticalalignment='center') - else: - plt.title('Rank Energy Distribution for species ' + - str(spp[i])) - - filename = os.path.join(folder_name, self.out_dir + - '_sed_rank_energy_' + str(spp[i]) + '_' + str(i)) - - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - count += 2 - - elif spp == None: - if criteria != None: - plt.title('Criteria: ' + str(criteria[i])) - else: - plt.title('Plot number ' + str(i)) - - filename = os.path.join(folder_name, self.out_dir + - '_sed_rank_energy_' + str(i)) - logging.info('Saving figure ' + filename) - plt.savefig(filename) - output_form(recs[i], filename) - plt.clf() - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_plots.format(count, count/2, count/2, - folder_name, - 'species-level energy distribution (SED) rank energy plots', - 'sed_rank_energy')) - fout.close() - -class OutputRarity(object): - ''' - This object accepts output from the Compare.compare_rarity method to - output rarity - - ''' - - def __init__(self, out_dir): - ''' - - Parameters - ---------- - out_dir : string - String appended to output directory - - ''' - - self.out_dir = out_dir - - def output_rarity(self, rarity, data_path, data, criteria=None): - ''' - Outputs csv files containing rarity measures - - Parameters - ---------- - rarity : a CompareRarity object - - data_path : str - data_path string for identifying data in csv file - - data : list - A list of observed species abundance distributions - - criteria : dict or None - The criteria for how the plot was split - - ''' - folder_name = 'rarity_values_' + self.out_dir - make_directory(folder_name) - - keys = list(rarity.viewkeys()) - dtype = [(kw, np.int) for kw in keys] - dtype.insert(0, ('criteria', 'S90')) # arbitrary length - dtype.insert(0, ('data_name', 'S90')) # arbitrary length - - # Get a list of my minimums - rare_list = [] - mins = list(rarity['observed'].viewkeys()) - for mn in mins: - rarity_array = np.empty(len(data), dtype=dtype) - rarity_array['criteria'] = criteria - nm = os.path.split(data_path)[1].split('.')[0] - rarity_array['data_name'] = np.repeat(nm, len(rarity_array)) - for kw in keys: - rarity_array[kw] = rarity[kw][mn] - rare_list.append(rarity_array) - - # Output results - count = 0 - for i, rare in enumerate(rare_list): - filename = os.path.join(folder_name, self.out_dir + '_rarity_<=_' + - str(mins[i])) - logging.info('Saving rarity data ' + filename) - output_form(rare, filename) - count += 1 - - fout = open(os.path.join(folder_name, 'README'), 'w') - fout.write(readme_info_rarity.format(folder_name, count)) - fout.close() - -def make_rec_from_dict(dist_dict, num, species=None, dt=np.float, add_rank=True): - ''' - Makes a structured/rec array from a dictionary - - Parameters - ---------- - dist_dict : dict - A dictionary with each keyword referencing a list of arrays - - num : int - Number of rec_arrays to return in list - - species : None or list of iterables - If not None, species should be a list of iterables that is the same - length as the list of iterables in any keyword in dist_dict. - - Returns - ------- - : structured array - - ''' - - # Check that species has the appropriate length - if species != None: - species = cp.deepcopy(species) - for val in dist_dict.itervalues(): - if len(species) != len(val): - raise TypeError('Species must contain the same number of ' + - 'iterables as each value in dist_dict') - # Sort Observed and species list - if species != None: - dist_dict['observed'], species = sort_rank_abund(dist_dict['observed'], - species) - recs = [] - names = list(dist_dict.viewkeys()) - dtype = zip(names, np.repeat(dt, len(names))) - if species != None: - dtype.insert(0, ('species', 'S40')) - if add_rank: - dtype.insert(0, ('rank', dt)) - for i in xrange(num): - temp = np.empty(len(dist_dict[names[0]][i]), dtype=dtype) - if species != None: - temp['species'] = species[i] - if add_rank: - temp['rank'] = np.arange(1,len(temp) + 1)[::-1] - for kw in dist_dict.iterkeys(): - temp[kw] = np.sort(dist_dict[kw][i]) - recs.append(temp) - return recs - -def sort_rank_abund(abund_list, spp_list): - ''' - Sorts and returns two lists based on abundance - - Parameters - ---------- - abund_list : list of arrays - - spp_list : list of arrays - - Returns - ------- - :tuple - sorted_abund, sorted_spp - - ''' - - assert len(abund_list) == len(spp_list), 'Lengths of arguments not equal' - assert np.all([len(a) == len(b) for a,b in zip(abund_list, spp_list)]),\ - 'Lengths of all corresponding iterables not equal' - abund_list = [np.array(t) for t in abund_list] - spp_list = [np.array(t) for t in spp_list] - - sorted_abund = [] - sorted_spp = [] - for i in xrange(len(abund_list)): - temp = np.array(zip(abund_list[i], spp_list[i]), dtype=[('a', - abund_list[i].dtype), ('s', spp_list[i].dtype)]) - temp_sorted = np.sort(temp, order='a') - sorted_abund.append(temp_sorted['a']) - sorted_spp.append(temp_sorted['s']) - - return sorted_abund, sorted_spp - -def plot_rec_columns(rec_array): - ''' - Function plots the columns in a rec array. - ''' - - # Available plotting symbols - plot_symbols = ['+', 's', 'd', '*', 'x', '8', 'H', '1', 'p', '2', '3', - '4', '|', 4, 5, 6, 7] - names = rec_array.dtype.names - legend = [] - - # If there are more arrays than symbols just change colors of lines - if len(names) > len(plot_symbols): - for nm in names: - if nm != 'species' and nm != 'rank': - if nm == 'observed': - fig = plt.plot(np.arange(1, len(rec_array) + 1), - np.sort(rec_array[nm])[::-1], '-o', - color='black') - legend.append(nm) - else: - fig = plt.plot(np.arange(1, len(rec_array) + 1), - np.sort(rec_array[nm])[::-1], '-o') - legend.append(nm) - - # Else, use different symbols/markers for each line - elif len(names) <= len(plot_symbols): - - # Counter is 0 - cnt = 0 - for nm in names: - if nm != 'species' and nm != 'rank': - if nm == 'observed': - - fig = plt.plot(np.arange(1, len(rec_array) + 1), - np.sort(rec_array[nm])[::-1], '-o', - color='black') - legend.append(nm) - else: - fig = plt.plot(np.arange(1, len(rec_array) + 1), - np.sort(rec_array[nm])[::-1], '-' + - str(plot_symbols[cnt]), - markeredgecolor='none') - legend.append(nm) - cnt += 1 - # Include ticks only on bottom and left - fig[0].axes.xaxis.tick_bottom() - fig[0].axes.yaxis.tick_left() - - plt.legend(tuple(legend), loc='best') - -def make_directory(folder_name): - '''Makes a directory named folder_name. If the directory exists it - is overwritten - - folder_name - Name of the directory - ''' - - try: - os.mkdir(folder_name) - except OSError: - shutil.rmtree(folder_name) - os.mkdir(folder_name) - From a6110e3d401e4569804c58cf6ca7276a357c7bf9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 15:40:17 -0700 Subject: [PATCH 137/343] Rename models file to _distributions --- macroeco/models/__init__.py | 16 ++++++++-------- macroeco/models/{models.py => _distributions.py} | 0 .../{test_models.py => test_distributions.py} | 0 3 files changed, 8 insertions(+), 8 deletions(-) rename macroeco/models/{models.py => _distributions.py} (100%) rename macroeco/models/{test_models.py => test_distributions.py} (100%) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index f3cf781..08ba9fa 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -3,13 +3,13 @@ Models (:mod:`macroeco.models`) =============================== -This module contains distributions commonly used in analysis of ecological +This module contains distributions commonly used in analysis of ecological patterns. At present, all distributions here are univariate. -Most of these distributions are subclasses of `~scipy.stats.rv_continuous` and -`~scipy.stats.rv_discrete` found in `scipy.stats`. Additionally, several of the -distribution classes here are simple wrappers for existing distributions found -in `scipy.stats` that are updated to allow the use of common ecological +Most of these distributions are subclasses of `~scipy.stats.rv_continuous` and +`~scipy.stats.rv_discrete` found in `scipy.stats`. Additionally, several of the +distribution classes here are simple wrappers for existing distributions found +in `scipy.stats` that are updated to allow the use of common ecological parameterizations. Continouous distributions @@ -32,9 +32,9 @@ nbinom .. DV: - Our public-facing distributions do not use location and scale parameters, as + Our public-facing distributions do not use location and scale parameters, as they are not common in quantitative ecology. """ -from models import (geom, geom_uptrunc, nbinom, - expon, expon_uptrunc) +from _distributions import (geom, geom_uptrunc, nbinom, + expon, expon_uptrunc) diff --git a/macroeco/models/models.py b/macroeco/models/_distributions.py similarity index 100% rename from macroeco/models/models.py rename to macroeco/models/_distributions.py diff --git a/macroeco/models/test_models.py b/macroeco/models/test_distributions.py similarity index 100% rename from macroeco/models/test_models.py rename to macroeco/models/test_distributions.py From 5b213b6a6399c7573df03083ba4ca20990c9e04c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 15:41:17 -0700 Subject: [PATCH 138/343] Finish changing name of fit2 to fit_mle --- macroeco/main/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 1894530..907734f 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -243,10 +243,10 @@ def _arg_kwarg_lists(options, module): kw_names = [] # Inspection for rv classes doesn't work since it uses args internally - # Unless method is translate_args or fit2, appends shapes to args + # Unless method is translate_args or fit_mle, appends shapes to args try: obj_meth = options['analysis'].split('.') - if obj_meth[1] not in ['fit2', 'translate_args']: + if obj_meth[1] not in ['fit_mle', 'translate_args']: arg_names += eval(module+'.'+obj_meth[0]+'.'+"shapes.split(',')") except: pass @@ -301,7 +301,7 @@ def _fit_models(options, core_results): def _get_fits(data, model): - return eval("mod.%s.fit2(data)" % model) + return eval("mod.%s.fit_mle(data)" % model) def _get_values(data, model, fits): From bae35e4a61c287e472399da2add916dc7b276265 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 15:42:01 -0700 Subject: [PATCH 139/343] Fix bug in main that ignored subset --- macroeco/main/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 907734f..5b72f29 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -187,7 +187,9 @@ def _emp_extra_options(options): raise IOError, ("Path to metadata file %s is invalid." % metadata_path) - options['patch'] = emp.Patch(metadata_path) + subset = options.get('subset', '') + + options['patch'] = emp.Patch(metadata_path, subset) options['cols'], options['splits'] = _get_cols_splits(options) return options From 82aa07e7da2c80e9517efbfa68516ab0a3755279 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 15:45:15 -0700 Subject: [PATCH 140/343] Language, whitespace, and import cleanup in empirical --- macroeco/empirical/empirical.py | 86 +++++++++++++++------------------ 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index ef4b073..990e7cd 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -1,15 +1,14 @@ from __future__ import division import os -import numpy as np -import pandas as pd - +import re from configparser import ConfigParser +import itertools +from copy import deepcopy from twiggy import log log = log.name('emp ') -from math import radians, cos, sin, asin, sqrt -import itertools -from copy import deepcopy +import numpy as np +import pandas as pd import scipy.spatial.distance as dist try: import shapely.geometry as geo @@ -36,7 +35,7 @@ second element is a dataframe giving the result.""" cols_note = \ - """The parameter `cols` is a dictionary with keys for four special + """The parameter ``cols`` is a dictionary with keys for four special columns and values giving the column name in the patch data table associated with each special column. @@ -51,11 +50,10 @@ count_col is used when multiple individuals of a species may be found at a single recorded location, as is the case in gridded censuses where all individuals in a quadrat are "assigned" to a single point. energy_col - and mass_col are used for energy-based metrics. - """ + and mass_col are used for energy-based metrics.""" splits_note = \ - """The parameter `splits` is a semicolon-separated string in the form of + """The parameter ``splits`` is a semicolon-separated string in the form of "column: value", where column is a name of a column in the patch data table and value is either (a) an integer giving the number of equally-spaced divisions of a column, or (b) the special keyword @@ -86,7 +84,7 @@ class Patch(object): Table of census data recorded in patch meta : ConfigParser obj Object similar to dict describing data table, loaded from metadata file - at metadata_path + at metadata_path and processed by subset subset : str Subset string passed as parameter @@ -102,8 +100,8 @@ class Patch(object): For csv data files, subset is a semicolon-separated string describing subset operations. For example, the string "year==2005; x>20; x<40; spp=='cabr'" loads a data table containing only records for which the year - is 2005, x values are between 20 and 40, and species 'cabr'. Note that for - categorical columns, the value of the column must be enclosed in single + is 2005, x values are between 20 and 40, and species is 'cabr'. Note that + for categorical columns, the value of the column must be enclosed in single quotes. For sql/db files, subset is a SQL query string that selects the data from @@ -117,11 +115,10 @@ def __init__(self, metadata_path, subset=''): self.meta.read(metadata_path) self.subset = subset self.table = self._load_table(metadata_path, - self.meta['Description']['datapath'], - subset) + self.meta['Description']['datapath']) - def _load_table(self, metadata_path, relative_data_path, subset): + def _load_table(self, metadata_path, data_path): """ Load data table, taking subset if needed @@ -129,10 +126,8 @@ def _load_table(self, metadata_path, relative_data_path, subset): ---------- metadata_path : str Path to metadata file - relative_data_path : str - Path to data file from location of metadata file - subset : str - String describing subset of data to use for analysis + data_path : str + Path to data file, absolute or relative to metadata file Returns ------- @@ -142,21 +137,22 @@ def _load_table(self, metadata_path, relative_data_path, subset): """ metadata_dir = os.path.dirname(metadata_path) - data_path = os.path.normpath(os.path.join(metadata_dir, - relative_data_path)) - type = data_path.split('.')[-1] + data_path = os.path.normpath(os.path.join(metadata_dir, data_path)) - if type == 'csv': + extension = data_path.split('.')[-1] + + if extension == 'csv': full_table = pd.read_csv(data_path) - table = _subset_table(full_table, subset) - elif type in ['db', 'sql']: - table = self._get_db_table(data_path, type, subset) + table = _subset_table(full_table, self.subset) + self.meta = _subset_meta(self.meta, self.subset) + elif extension in ['db', 'sql']: + table = self._get_db_table(data_path, extension) else: - raise TypeError('Cannot process file of type %s' % type) + raise TypeError('Cannot process file of type %s' % extension) return table - def _get_db_table(self, data_path, type): + def _get_db_table(self, data_path, extension): """ Query a database and return query result as a recarray @@ -164,7 +160,7 @@ def _get_db_table(self, data_path, type): ---------- data_path : str Path to the database file - type : str + extension : str Type of database, either sql or db Returns @@ -176,7 +172,7 @@ def _get_db_table(self, data_path, type): # TODO: This is probably broken # Load table - if type == 'sql': + if extension == 'sql': con = lite.connect(':memory:') con.row_factory = lite.Row cur = con.cursor() @@ -237,6 +233,7 @@ def _subset_table(full_table, subset): if not subset: return full_table + # TODO: Figure out in syntax for logical or conditions = subset.replace(' ','').split(';') valid = np.ones(len(full_table), dtype=bool) @@ -262,14 +259,13 @@ def sad(patch, cols, splits='', clean=True): Returns ------- - {1} - Result has two columns: spp (species identifier) and y - (individuals of that species). - + {1} Result has two columns: spp (species identifier) and y (individuals of + that species). Notes ----- {2} + {3} """ @@ -298,7 +294,7 @@ def sad(patch, cols, splits='', clean=True): if clean: subdf = subdf[subdf['y'] > 0] - # Append split result + # Append subset result result_list.append((substring, subdf)) # Return all results @@ -317,13 +313,13 @@ def ssad(patch, cols, splits=''): Returns ------- - {1} - Result has one column: y (individuals of species in each subplot). - + {1} Result has one column giving the individuals of species in each + subplot. Notes ----- {2} + {3} """ @@ -331,10 +327,6 @@ def ssad(patch, cols, splits=''): # Get and check SAD sad_results = sad(patch, cols, splits, clean=False) - if len(sad_results) == 1: - raise ValueError, ("SSAD requires patch to be split into more than " - "one subplot") - # Create dataframe with col for spp name and numbered col for each split for i, sad_result in enumerate(sad_results): if i == 0: # For first result, create dataframe @@ -1080,13 +1072,13 @@ def decdeg_distance(pt1, pt2): lat2, lon2 = pt2 # Convert decimal degrees to radians - lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) + lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2]) # haversine formula dlon = lon2 - lon1 dlat = lat2 - lat1 - a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 - c = 2 * asin(sqrt(a)) + a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2 + c = 2 * np.asin(np.sqrt(a)) km = 6367 * c return km @@ -1167,7 +1159,7 @@ def _yield_subtables(patch, splits): if splits: subset_list = _parse_splits(patch, splits) for subset in subset_list: - log.info('Analyzing split: %s' % subset) + log.info('Analyzing subset: %s' % subset) yield subset, _subset_table(patch.table, subset) else: yield '', patch.table From 0e9954e027343963c1ed0259aef9029dcaf9068e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 15:45:37 -0700 Subject: [PATCH 141/343] Add subsetting of metadata and step metadata parameter --- macroeco/empirical/empirical.py | 56 +++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 990e7cd..ce4234a 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -94,6 +94,11 @@ class Patch(object): consisting only of letters and numbers, with no spaces or other special characters. + The meta attribute of this object is processed to reflect the value of + subset. If columns with a min and a max are included in the subset string, + the min and max values for that column in meta will be updated to reflect + the specified limits. + The parameter subset takes different forms depending on whether the data file described by the metadata is a csv or a sql/db file. @@ -243,6 +248,52 @@ def _subset_table(full_table, subset): return full_table[valid] +def _subset_meta(full_meta, subset): + """ + Return subtable matching all conditions in subset. + + Parameters + ---------- + full_meta : ConfigParser obj + Metadata object + subset : str + String describing subset of data to use for analysis + + Returns + ------- + ConfigParser obj + Updated version of full_meta accounting for subset string + + """ + if not subset: + return full_meta + + conditions = subset.replace(' ','').split(';') + + for condition in conditions: + condition_list = re.split('[<>=]', condition) + col = condition_list[0] + val = condition_list[-1] + col_step = full_meta[col]['step'] + operator = re.sub('[^<>=]', '', condition) + + if operator == '==': + full_meta[col]['min'] = val + full_meta[col]['max'] = val + elif operator == '>=': + full_meta[col]['min'] = val + elif operator == '>': + full_meta[col]['min'] = str(eval(val) + eval(col_step)) + elif operator == '<=': + full_meta[col]['max'] = val + elif operator == '<': + full_meta[col]['max'] = str(eval(val) - eval(col_step)) + else: + raise ValueError, "Subset %s not valid" % condition + + return full_meta + + @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) def sad(patch, cols, splits='', clean=True): @@ -1198,10 +1249,11 @@ def _parse_splits(patch, splits): level_list = [col + '==' + str(x) + ';' for x in np.unique(patch.table[col])] else: + col_step = eval(patch.meta[col]['step']) # eval converts to float col_min = eval(patch.meta[col]['min']) col_max = eval(patch.meta[col]['max']) - step = (col_max - col_min) / eval(val) - starts = np.arange(col_min, col_max, step) + step = (col_max - col_min + col_step) / eval(val) + starts = np.arange(col_min, col_max + col_step, step) ends = starts + step level_list = [col + '>=' + str(x) + '; ' + col + '<' + str(y)+'; ' for x, y in zip(starts, ends)] From 03277503f687903551669ac3d1f57fd854371798 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 18:56:16 -0700 Subject: [PATCH 142/343] First review of compare --- macroeco/compare/__init__.py | 4 +- macroeco/compare/compare.py | 203 +++++++++++------------------------ 2 files changed, 68 insertions(+), 139 deletions(-) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index f9fb864..db0fd25 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -26,4 +26,6 @@ """ -from .compare import * +from .compare import (AIC, AICC, AIC_weights, nll, lrt, + sum_of_squares, r_squared, chi_squared, + empirical_cdf, bin_data) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index 3316452..eae2ea1 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -6,16 +6,68 @@ import pandas as pd -def nll(values): +def nll(data, model): """ - Calculate negative log likelihood from an array of pdf/pmf values. + Calculate negative log likelihood from a set of pdf/pmf values + + Parameters + ---------- + x : iterable + pmf/pdf values + + Returns + ------- + array + Negative log likelihood """ - values = _to_arrays(values)[0] - return -np.sum(np.log(values)) + x = _to_arrays(x)[0] + return -np.sum(np.log(x)) -def AIC(values, params): +def lrt(data, model_null, model_alt): + """ + This functions compares two nested models using the likelihood ratio + test. + + Parameters + ---------- + nll_null : float + The negative log-likelihood of the null model + nll_alt : float + The negative log-likelihood of the alternative model + df_list : int + the degrees of freedom calculated as (number of free parameters in + alternative model) - (number of free parameters in null model). + Alternatively, the number of additional parameters in the alternative + model. + + Returns + ------- + : tuple + (test_statistic, p-value) + + Notes + ----- + + Interpretation: p-value < alpha suggests signficant evidence for your + alternative model + + The LRT only applies to nested models. The variable test_stat is known as + the G^2 statistic. The G-test uses the fact that -2log(Likelihood_null / + Likelihood_alt) is approximately chi-squared. This assumption breaks down + for small samples sizes. + + """ + + # Calculate G^2 statistic + ll_null = nll_null * -1 + ll_alt = nll_alt * -1 + test_stat = -2 * (ll_null - ll_alt) + return (test_stat, stats.chisqprob(test_stat, df)) + + +def AIC(data, model, params=None, corrected=True): """ Calculate AIC given values of a pdf/pmf and a set of model parameters. """ @@ -25,7 +77,6 @@ def AIC(values, params): return 2 * k + 2 * L -def AICC(values, params): """ Calculate AICC given values of a pdf/pmf and a set of model parameters. @@ -48,7 +99,7 @@ def AICC(values, params): return AIC(values, params) + (2 * k * (k + 1)) / (n - k - 1) -def AIC_weights(aic_values): +def AIC_weights(aic_list): """ Calculates the aic_weights for a given set of models. @@ -78,79 +129,16 @@ def AIC_weights(aic_values): return weights, delta -def empirical_cdf(data): - """ - Generates an empirical cdf from empirical data - - Parameters - ---------- - data : array-like object - Empirical data - - Returns - -------- - : array - The empirical cdf corresponding to the inputted data - - """ - # TODO: This should return sorted data also, otherwise trying to match the - # input data to output does not correspond (result is sorted, data is not - # necessarily). - - vals = pd.Series(data).value_counts() - ecdf = pd.DataFrame(data).set_index(keys=0) - probs = pd.DataFrame(vals.sort_index().cumsum() / np.float(len(data))) - ecdf = ecdf.join(probs) - - return np.array(ecdf[0]) - - -class gen_loss_function(object): - """ - Generic class for loss function between observed and predicted data - - """ - - def __init__(self, loss_fxn_str): - """ - Parameters - ---------- - loss_fxn_str : string - A Python string representing the loss function between observed - (obs) and predicted (pred). - - Notes - ----- - - Ex. 'np.abs(obs - pred)' or '(obs - pred)**2' - - """ - self.loss_fxn = loss_fxn_str - - def total_loss(self, obs, pred): - """ - Total loss for observed and predicted - - Parameters - ---------- - obs, pred : array-like objects - observed and predicted data - - Returns - ------- - : float - The sum of the loss function - """ +def bayes_factor(): + pass - obs, pred = _to_arrays(obs, pred) - return np.sum(eval(self.loss_fxn)) -sum_of_squares = gen_loss_function('(obs - pred)**2').total_loss +def sum_of_squares(obs, pred): + return np.sum((np.array(obs) - np.array(pred))**2) -def r_squared(obs, pred): +def r_squared(obs, pred, one_to_one=False): """ - Get's the R^2 value for a regression of observed data (X) and predicted (Y) Parameters @@ -163,73 +151,12 @@ def r_squared(obs, pred): The R**2 value for the regression of observed on predicted """ - + # TODO: Add one_to_one b0, b1, r, p_value, se = stats.linregress(obs, pred) return r ** 2 -def ks_two_sample(): - """ - Two sample Kolmogorov Smirnov distribution. Uses the cumulative - distribution functions to test whether two samples were drawn from the same - continuous distribution. Can be a decent approxmiation for discrete data - (CHECK THIS), but the chi-squared test may be more appropriate. - - """ - - pass - - -def ks_one_sample(): - pass - - -def lrt(nll_null, nll_alt, df): - """ - This functions compares two nested models using the likelihood ratio - test. - - Parameters - ---------- - nll_null : float - The negative log-likelihood of the null model - nll_alt : float - The negative log-likelihood of the alternative model - df_list : int - the degrees of freedom calculated as (number of free parameters in - alternative model) - (number of free parameters in null model). - Alternatively, the number of additional parameters in the alternative - model. - - Returns - ------- - : tuple - (test_statistic, p-value) - - Notes - ----- - - Interpretation: p-value < alpha suggests signficant evidence for your - alternative model - - The LRT only applies to nested models. The variable test_stat is known as - the G^2 statistic. The G-test uses the fact that -2log(Likelihood_null / - Likelihood_alt) is approximately chi-squared. This assumption breaks down - for small samples sizes. - - """ - - # Calculate G^2 statistic - ll_null = nll_null * -1 - ll_alt = nll_alt * -1 - test_stat = -2 * (ll_null - ll_alt) - return (test_stat, stats.chisqprob(test_stat, df)) - -def bayes_factor(): - pass - - -def chi_squared(dists): +def chi_squared(x1, x2, bin_type='linear'): """ Chi-squared test to compare two or more distributions. From ec893a42bd5e0cf92b956b5d94c8d11be1aadd78 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 18:57:15 -0700 Subject: [PATCH 143/343] Move empirical_cdf to empirical module --- macroeco/empirical/empirical.py | 29 +++++++++++++++++++++++++++++ macroeco/main/main.py | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index ce4234a..f46e37b 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -1276,3 +1276,32 @@ def _product(*args, **kwds): for pool in pools: result = [x+[y] for x in result for y in pool] return result + + + + +def empirical_cdf(data): + """ + Generates an empirical cdf from empirical data + + Parameters + ---------- + data : array-like object + Empirical data + + Returns + -------- + : array + The empirical cdf corresponding to the inputted data + + """ + # TODO: This should return sorted data also, otherwise trying to match the + # input data to output does not correspond (result is sorted, data is not + # necessarily). + + vals = pd.Series(data).value_counts() + ecdf = pd.DataFrame(data).set_index(keys=0) + probs = pd.DataFrame(vals.sort_index().cumsum() / np.float(len(data))) + ecdf = ecdf.join(probs) + + return np.array(ecdf[0]) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 5b72f29..8037950 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -470,7 +470,7 @@ def calc_func(model, df, shapes): # CDF x = core_result['y'].values - emp_cdf = comp.empirical_cdf(x) + emp_cdf = emp.empirical_cdf(x) df = pd.DataFrame({'x': x, 'empirical': emp_cdf}) def calc_func(model, df, shapes): From 75f1e655bb0687343e0dc7526712a5d9b46c92c9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 19:47:39 -0700 Subject: [PATCH 144/343] Clean up init for compare --- macroeco/compare/__init__.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index db0fd25..e3dc66a 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -13,19 +13,16 @@ .. autosummary:: :toctree: generated/ + nll + lrt AIC - AICC AIC_weights - nll - empirical_cdf sum_of_squares r_squared - chi_squared - lrt bin_data """ -from .compare import (AIC, AICC, AIC_weights, nll, lrt, - sum_of_squares, r_squared, chi_squared, - empirical_cdf, bin_data) +from .compare import (nll, lrt, AIC, AIC_weights, + sum_of_squares, r_squared, + bin_data) From 47e5c5b0ce6de3a0da764949ef9be678a556b798 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 25 Mar 2014 19:47:55 -0700 Subject: [PATCH 145/343] Provide flat_output option in main --- macroeco/main/main.py | 65 ++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 8037950..e415b96 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -23,7 +23,7 @@ from .. import compare as comp -def main(param_path='parameters.txt'): +def main(param_path='parameters.txt', flat_output=False): """ Entry point function for analysis based on parameter files. @@ -31,24 +31,40 @@ def main(param_path='parameters.txt'): ---------- param_path : str Path to user-generated parameter file + flat_output : bool + Place all output in parameter directory instead of results + subdir. Default False. Only allowed if single run in parameters file. """ - # Confirm parameters file is present and extract dir + # Confirm parameters file is present if not os.path.isfile(param_path): raise IOError, "Parameter file not found at %s" % param_path - param_dir = os.path.abspath(os.path.dirname(param_path)) - # Setup results_dir, remove if present - results_dir = os.path.join(param_dir, 'results') - if os.path.isdir(results_dir): - shutil.rmtree(results_dir) - os.makedirs(results_dir) + # Get raw params and base options (non-run-dependent options) + params, base_options = _get_params_base_options(param_path, flat_output) + + # Confirm that flat_output is allowed + if flat_output and len(base_options['run_names']) > 1: + raise ValueError, "flat_output option only possible with a single run" - # Get logger and announce start - log = setup_log(results_dir) + # Start logging + log = setup_log(base_options['results_dir']) log.info('Starting analysis') + # Do analysis for each run + for run_name in base_options['run_names']: + log.info('Starting run %s' % run_name) + options = dict(params[run_name]) # All parameters from this run + options.update(base_options) # Add base parameters + options['run_dir'] = os.path.join(base_options['results_dir'],run_name) + _do_analysis(options) + log.info('Finished run %s' % run_name) + log.info('Finished analysis successfully') + + +def _get_params_base_options(param_path, flat_output): + # Read parameter file into params object params = configparser.ConfigParser() try: @@ -56,16 +72,25 @@ def main(param_path='parameters.txt'): except: raise ValueError, "Parameter file is invalid" - # Do analysis for each run with options dict (params + addl options) - run_names = params.sections() - for run_name in run_names: - log.info('Starting run %s' % run_name) - options = dict(params[run_name]) - options['param_dir'] = param_dir - options['run_dir'] = os.path.join(results_dir, run_name) - _do_analysis(options) - log.info('Finished run %s' % run_name) - log.info('Finished analysis successfully') + # Setup param_dir and results_dir, get run_names + param_dir = os.path.abspath(os.path.dirname(param_path)) + if flat_output: + results_dir = param_dir + run_names = [''] + else: + results_dir = os.path.join(param_dir, 'results') + if os.path.isdir(results_dir): + shutil.rmtree(results_dir) + os.makedirs(results_dir) + run_names = params.sections() + + # Create options dict + base_options = {} + base_options['param_dir'] = param_dir + base_options['results_dir'] = results_dir + base_options['run_names'] = run_names + + return params, base_options def _do_analysis(options): From fd7c438169eba17797c259249bcdd8d238ca0f29 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 14:37:56 -0700 Subject: [PATCH 146/343] Rewrote compare.py in model, data or obs, pred format --- macroeco/compare/compare.py | 230 ++++++++++++++++++++++-------------- 1 file changed, 141 insertions(+), 89 deletions(-) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index eae2ea1..0648597 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -5,47 +5,71 @@ import scipy.stats as stats import pandas as pd +from ..misc import doc_sub +_data_doc = """data : array-like + data from which to caculate the the likelihood""" + +_model_doc = """model : scipy distribution object + A frozen scipy model object. Needs to have the attribute *.shape""" + +_obs_pred_doc = """obs, pred : array-like objects + Observed and predicted data""" + + +@doc_sub(_data_doc, _model_doc) def nll(data, model): """ - Calculate negative log likelihood from a set of pdf/pmf values + Calculate the neagtive log likelihood given data and a model Parameters ---------- - x : iterable - pmf/pdf values + {0} + + {1} Returns ------- - array + float Negative log likelihood + """ - x = _to_arrays(x)[0] - return -np.sum(np.log(x)) + try: + log_lik_vals = model.logpmf(data) + except: + log_lik_vals = model.logpdf(data) + return -np.sum(log_lik_vals) -def lrt(data, model_null, model_alt): +@doc_sub(_data_doc) +def lrt(data, model_null, model_alt, df=None): """ This functions compares two nested models using the likelihood ratio test. Parameters ---------- - nll_null : float - The negative log-likelihood of the null model - nll_alt : float - The negative log-likelihood of the alternative model - df_list : int - the degrees of freedom calculated as (number of free parameters in - alternative model) - (number of free parameters in null model). - Alternatively, the number of additional parameters in the alternative - model. + {0} + + model_null : scipy distribution object + The null model as a frozen scipy distribution object. Parameters of + distribution must be given as keyword arguments. + Ex. ``norm = stats.norm(loc=0, scale=1)`` + + model_alt : scipy distribution object + The alternative model as a a frozen scipy distribution object. + + df : int + Optional. Specify the degrees of freedom for the lrt. Calculated + as the number of parameters in model_alt - number of parameters in + model_null. If None, the df is calculated from the model + objects. Returns ------- - : tuple - (test_statistic, p-value) + tuple + (G^2 statistic, p-value) Notes ----- @@ -61,28 +85,44 @@ def lrt(data, model_null, model_alt): """ # Calculate G^2 statistic - ll_null = nll_null * -1 - ll_alt = nll_alt * -1 + ll_null = nll(data, model_null) * -1 + ll_alt = nll(data, model_alt) * -1 test_stat = -2 * (ll_null - ll_alt) + + # Set df if necessary + if not df: + df = len(model_alt.kwds) - len(model_null.kwds) + return (test_stat, stats.chisqprob(test_stat, df)) +@doc_sub(_data_doc, _model_doc) def AIC(data, model, params=None, corrected=True): """ - Calculate AIC given values of a pdf/pmf and a set of model parameters. - """ - values, params = _to_arrays(values, params) - k = len(params) # Num parameters - L = nll(values) - return 2 * k + 2 * L + Calculate AIC given values of a model given data and model parameters + Parameters + ---------- + {0} - """ - Calculate AICC given values of a pdf/pmf and a set of model parameters. + {1} + + params : int + The number of parameters in the model. If None, calculates the number + of parameters from the distribution object + + corrected : bool + If True, calculates the corrected AICC, if False calculates the + uncorrected AIC. + + Returns + ------- + float + AIC(C) value Notes ----- - Should be used when the number of observations is < 40. + AICC should be used when the number of observations is < 40. References ---------- @@ -92,25 +132,34 @@ def AIC(data, model, params=None, corrected=True): York City, USA: Springer. """ + n = len(data) # Number of observations + L = nll(data, model) - values, params = _to_arrays(values, params) - k = len(params) # Num parameters - n = len(values) # Num observations - return AIC(values, params) + (2 * k * (k + 1)) / (n - k - 1) + if not params: + k = len(model.kwds) + else: + k = params + + if corrected: + aic_value = 2 * k + 2 * L + (2 * k * (k + 1)) / (n - k - 1) + else: + aic_value = 2 * k + 2 * L + + return aic_value def AIC_weights(aic_list): """ - Calculates the aic_weights for a given set of models. + Calculates the AIC weights for a given set of models. Parameters ----------------- - aic_values : array-like object + aic_list : array-like object Array-like object containing AIC values from different models Returns ------------- - (weights, delta) : tuple + tuple First element contains the relative AIC weights, second element contains the delta AIC values. @@ -120,7 +169,7 @@ def AIC_weights(aic_list): best model in comparison to the other models """ - aic_values = _to_arrays(aic_values)[0] + aic_values = np.array(aic_list) minimum = np.min(aic_values) delta = aic_values - minimum values = np.exp(-delta / 2) @@ -129,76 +178,79 @@ def AIC_weights(aic_list): return weights, delta -def bayes_factor(): - pass - - +@doc_sub(_obs_pred_doc) def sum_of_squares(obs, pred): - return np.sum((np.array(obs) - np.array(pred))**2) - - -def r_squared(obs, pred, one_to_one=False): """ - Get's the R^2 value for a regression of observed data (X) and predicted (Y) + Calculates the sum of squares between observed (X) and predicted (Y) data. + Attempts to braodcast arrays if lengths don't match. Parameters ---------- - obs, pred : array-like objects + {0} Returns ------- - : float - The R**2 value for the regression of observed on predicted - + float + Sum of squares """ - # TODO: Add one_to_one - b0, b1, r, p_value, se = stats.linregress(obs, pred) - return r ** 2 + obs, pred = tuple(np.broadcast_arrays(obs, pred)) + return np.sum((np.array(obs) - np.array(pred)) ** 2) -def chi_squared(x1, x2, bin_type='linear'): +@doc_sub(_obs_pred_doc) +def r_squared(obs, pred, one_to_one=False, log_trans=True): """ - Chi-squared test to compare two or more distributions. + Get's the R^2 value for a regression of observed (X) and predicted (Y) + data Parameters - ------------------ - dists : list - List of distributions to compare. Each distribution in list should be - the same length and the location of each value in a list should be - compareable. This list will be made into a Chi-Squared contingency - table to analyze. + ---------- + {0} + + one_to_one : bool + If True, calculates the R^2 based on the one-to-one line as done in + [#]_. If False, calculates the standard R^2 from a regression fit. + + log_trans : bool + If True, log transforms obs and pred. Returns - ------------ - chi2 : float - The test statistic. - p : float - The p-value of the test - dof : int - Degrees of freedom - expected : ndarray, same shape as `observed` - The expected frequencies, based on the marginal sums of the table. + ------- + float + R^2 value Notes - --------- - Assumption of the Chi-squared test is that the expected value of 80% of - the cells is > 5. If this does not hold, the Normal approximation is not - valid and you should try an alternative approach. + ----- + Using just R^2 to compare the fit of observed and predicted values can be + misleading because the relationship may not be one-to-one but the R^2 + value may be quite high. The one-to-one option alleviates this problem. + + References + ---------- + .. [#] + White, E., Thibault, K., & Xiao, X. (2012). Characterizing the species + abundance distributions across taxa and ecosystems using a simple + maximum entropy model. Ecology, 93(8), 1772-8 - If all of the cells in a column contain zero and error will because teh - expected value of the cell is 0. """ - assert len(dists) > 1, "Length of dists must be greater than 1" - test_len = len(dists[0]) - assert np.all([len(dt) == test_len for dt in dists]), \ - "All dists must have equal length" + # Sort obs and pred + obs = np.sort(obs) + pred = np.sort(pred) + + if log_trans: + obs = np.log(obs) + pred = np.log(pred) - chi_table = np.array(dists, dtype=np.float) - chi2, p, dof, expected = stats.chi2_contingency(chi_table, - correction=False) + if one_to_one: + # Equation from White et al 2012 + r_sq = 1 - sum_of_squares(obs, pred) / \ + sum_of_squares(obs, np.mean(obs)) + else: + b0, b1, r, p_value, se = stats.linregress(obs, pred) + r_sq = r ** 2 - return chi2, p, dof, expected + return r_sq def bin_data(data, max_num): @@ -208,7 +260,7 @@ def bin_data(data, max_num): not split between bins. Parameters - ------------------ + ---------- data : array-like Data to be binned @@ -216,12 +268,12 @@ def bin_data(data, max_num): The maximum upper most boundary of the data Returns - ------------ - tuple : (binned_data, bin_edges) + ------- + tuple + (binned_data, bin_edges) References - ----------------- - + ---------- .. [#] Preston, F. (1962). The canonical distribution of commonness and rarity. Ecology, 43, 185-215 From 5292b882ee892af52b3cb29a7ae5019d690e397c Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 14:38:24 -0700 Subject: [PATCH 147/343] Began unit tests for compare.py --- macroeco/compare/test_compare.py | 318 ++++++++++++------------------- 1 file changed, 118 insertions(+), 200 deletions(-) diff --git a/macroeco/compare/test_compare.py b/macroeco/compare/test_compare.py index 99cc09b..1531cf5 100644 --- a/macroeco/compare/test_compare.py +++ b/macroeco/compare/test_compare.py @@ -5,7 +5,7 @@ """ from __future__ import division -from numpy.testing import (TestCase, assert_equal, assert_array_equal, +from numpy.testing import (TestCase, assert_equal, assert_array_equal, assert_almost_equal, assert_array_almost_equal, assert_allclose, assert_, assert_raises) @@ -15,128 +15,124 @@ import numpy.testing as nt -class TestCompare(TestCase): - '''Test Methods in compare.py''' +class TestNLL(TestCase): + '''Test NLL in compare''' def test_nll(self): - + # Test against R result: sum(dnorm(c(1,2,3,4,5), log=TRUE)) R_res = 32.09469 - test_vals = stats.norm.pdf((1, 2, 3, 4, 5)) - lglk = nll(test_vals) - assert_equal(R_res, np.round(lglk, decimals=5)) - - def test_empirical_cdf(self): - - #Test against R's ecdf function - - # Test Case 1 - test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] - R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] - res = empirical_cdf(test_data) - assert_array_equal(R_res, res) - - # Test Case 2 - test_data = [3, 3, 3, 3] - R_res = [1, 1, 1, 1] - res = empirical_cdf(test_data) - assert_array_equal(R_res, res) - - def test_aic(self): - - test_vals = stats.norm.pdf((1, 2, 3, 4, 5, 6, 7, 8)) - aic1 = AIC(test_vals, (1, 1)) - expected = 222.703016531 # Calculated by hand - assert_equal(np.round(aic1, decimals=9), expected) + data = np.array([1, 2, 3, 4, 5]) + model = stats.norm(loc=0, scale=1) + lglk = nll(data, model) + assert_almost_equal(R_res, lglk, decimal=5) + + +class TestAIC(TestCase): + """Test AIC function""" - test_vals = stats.gamma.pdf((1, 1, 1, 4, 5, 7, 12), 2) - aic1 = AIC(test_vals, (1, 1)) - expected = 51.146902 - assert_equal(np.round(aic1, decimals=6), expected) + def test_aic_basic(self): + """Testing basic functionality of AIC""" + + # Test case 1 + model = stats.norm(loc=0, scale=1) + data = np.arange(1, 9) + aic1 = AIC(data, model, corrected=False) + expected = 222.703016531 # Calculated by hand + assert_almost_equal(aic1, expected) + + # Test case 2 + model = stats.gamma(a=2) + data = [1, 1, 1, 2, 4, 5, 7, 12] + aic1 = AIC(data, model, corrected=False) + expected = 51.760607494 + assert_almost_equal(aic1, expected, decimal=6) + + # Test case 3 + model = stats.gamma(a=2, loc=0) + aic1 = AIC(data, model, corrected=False) + expected = 53.760607494 + assert_almost_equal(aic1, expected, decimal=6) + + def test_aic_given_params(self): + """ Test AIC if params are given """ + + # Test case 1 + model = stats.norm() + data = np.arange(1, 9) + aic1 = AIC(data, model, corrected=False, params=2) + + # statsmodel.tools.eval_measures.aic: aic(L, 8, 2) + expected = 222.703016531 + assert_almost_equal(aic1, expected) + + # Test case 2 + model = stats.gamma(2) + data = [1, 1, 1, 2, 4, 5, 7, 12] + aic1 = AIC(data, model, corrected=False, params=1) + + # statsmodel.tools.eval_measures.aic: aic(L, 8, 1) + expected = 51.760607494 + assert_almost_equal(aic1, expected, decimal=6) + + # Test case 3 + model = stats.gamma(2, 0) + aic1 = AIC(data, model, corrected=False, params=2) + + # statsmodel.tools.eval_measures.aic: aic(L, 8, 2) + expected = 53.760607494 + assert_almost_equal(aic1, expected, decimal=6) def test_aicc(self): - + """ Test AICC gives expected results""" + # Test values - test_vals = stats.norm.pdf((1, 2, 3, 4, 5, 6, 7, 8)) - aic1 = AICC(test_vals, (1, 1)) + model = stats.norm() + data = np.arange(1, 9) + aic1 = AIC(data, model, corrected=True, params=2) + expected = 225.10302 # Calculated by hand + assert_almost_equal(expected, aic1, decimal=5) - # Test that aicc gives the correct values - expected = 225.10302 - assert_equal(expected, np.round(aic1, decimals=5)) + +class TestAICWeights(TestCase): def test_aic_weights(self): - + # Test values - vals = [1, 1, 1, 2, 3, 4, 7, 23, 78] - values = [stats.norm.pdf(vals, scale=100), stats.norm.pdf(vals, - scale=99)] + data = [1, 1, 1, 2, 3, 4, 7, 23, 78] + models = [stats.norm(scale=100), stats.norm(scale=99)] + aic_vals = [AIC(data, tm) for tm in models] - aic_vals = [AICC(tval, 1) for tval in values] aicw, delta_aic = AIC_weights(aic_vals) + + # Calculated by hand pred = np.array([0.47909787, 0.52090213]) assert_array_almost_equal(aicw, pred) - def test_gen_loss_function(self): - - # Test absolute value loss function - loss_fxn = 'np.abs(obs - pred)' - loss = gen_loss_function(loss_fxn) - - obs = np.random.randint(3, 59, 100) - pred = np.random.randint(3, 59, 100) - test_loss = np.sum(np.abs(obs - pred)) - - pred_loss = loss.total_loss(obs, pred) - assert_equal(pred_loss, test_loss) - - # Test sum of squares loss function - test_loss = np.sum((obs - pred) ** 2) - pred_loss = sum_of_squares(obs, pred) - assert_equal(test_loss, pred_loss) - - # Test MSE loss function - loss_fxn = 'np.abs(obs - pred) / len(obs)' - loss = gen_loss_function(loss_fxn) - test_loss = np.sum(np.abs(obs - pred) / len(obs)) - pred_loss = loss.total_loss(obs, pred) - assert_equal(test_loss, pred_loss) +class TestRsquared(TestCase): - def test_r_squared(self): + def test_basic_r_squared(self): # Already unittested in scipy. Checking for functionaliity test_data = np.random.randint(5, 100, 100) rsq = r_squared(test_data, test_data) assert_equal(rsq, 1) - def test_chi_squared(self): + def test_one_to_one_rsq(self): - # Compare two distributions - # Chi squared function itself is already unittested in scipy - - bin_max = 16 - p = 0.99 - dist1 = stats.logser(p=p).rvs(100) - dist2 = stats.logser(p=p).rvs(100) - - bin1 = bin_data(dist1, np.max(bin_max))[0] - bin2 = bin_data(dist2, np.max(bin_max))[0] - - res = chi_squared([bin1, bin2]) - - # Check three distributions - dist3 = stats.logser(p=p).rvs(100) - bin3 = bin_data(dist3, np.max(bin_max))[0] + # Identical data should lead to an R^2 of 1 + test_data = np.random.randint(5, 100, 100) + rsq = r_squared(test_data, test_data, one_to_one=True) + assert_equal(rsq, 1) - res = chi_squared([bin1, bin2, bin3]) + # Test against R^2 from fixed slope linear regression in R + # Calculate by hand? - # Check error is thrown with only one dist - assert_raises(AssertionError, chi_squared, [bin1]) - # Check error is thrown if bins are different lengths - assert_raises(AssertionError, chi_squared, [bin1, bin2[:-1]]) +class TestBinData(TestCase): - def test_bin_data(self): + def test_bin_data_functionality(self): # Test against R's vegan prestonfit: prestonfit(data, tiesplit=FALSE) # Note that vegan drops the bins with 0 values @@ -151,6 +147,7 @@ def test_bin_data(self): test_res = bin_data(data, max(data))[0] assert_array_equal(test_res, vegan) + def test_bin_data_boundary(self): # Test boundary condition data = np.array([1, 2]) vegan = np.array([1, 1], dtype=np.float) @@ -167,110 +164,31 @@ def test_bin_data(self): test_res = bin_data(data, max(data))[0] assert_array_equal(test_res, vegan) - def test_lrt(self): - - # Test against what the lrtest() R function returns - model1 = 158.0494 - model0 = 139.806 - R_chisquare = 36.4868 - R_p = 1.537e-09 - - pred_chi, pred_p = lrt(model1, model0, 1) - - assert_almost_equal(pred_chi, R_chisquare) - assert_almost_equal(pred_p, R_p) - -# -# -# def test_ks_two_sample(self): -# # Unittested in scipy, testing that this function works -# -# d, p = ks_two_sample([1,1,2,3,4,5,6,12], [1,2,3,4,5,5,5,5,5,7,8,9]) -# -# def test_likelihood_ratio(self): -# -# # Test against what the lrtest() R function returns -# model1 = 158.0494 -# model0 = 139.806 -# R_chisquare = 36.4868 -# R_p = 1.537e-09 -# -# pred_chi, pred_p = likelihood_ratio(model0, model1, 1)[0] -# -# self.assertTrue(np.round(pred_chi, decimals=4) == R_chisquare) -# pred_p = np.round(pred_p, decimals=12) -# self.assertTrue(pred_p == R_p) -# -# -# def test_variance(self): -# -# # Test that I get back the correct values -# data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] -# expt = [] -# expt.append(np.var(data[0], ddof=1)) -# expt.append(np.var(data[1], ddof=1)) -# resulting_vals = variance(data) -# self.assertTrue(np.array_equal(np.array(expt), -# np.array(resulting_vals))) -# # Using np.var which is optimized and unittested -# -# def test_skew(self): -# -# # Using the scipy.stats definition which is optimized and unittested -# data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] -# expt = [] -# expt.append(stats.skew(data[0])) -# expt.append(stats.skew(data[1])) -# resulting_vals = skew(data) -# self.assertTrue(np.array_equal(np.array(expt), -# np.array(resulting_vals))) -# -# def test_kurtosis(self): -# -# # Using the scipy.stats definition which is optimized and unittested -# data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]] -# expt = [] -# expt.append(stats.kurtosis(data[0])) -# expt.append(stats.kurtosis(data[1])) -# resulting_vals = kurtosis(data) -# self.assertTrue(np.array_equal(np.array(expt), -# np.array(resulting_vals))) -# -# def test_mean_square_error(self): -# -# # Test against R mse function -# pred = np.arange(1,9) -# obs = np.arange(7, 15) -# -# comp_val = 36 -# pred = mean_squared_error(pred, obs) -# self.assertEqual(pred, comp_val) -# -# def test_bootstrap_moment(self): -# -# data1 = np.arange(1, 31) -# data2 = np.arange(20, 50) -# # Test the return is empty if wrong keyword is given -# bs_vals = bootstrap_moment(data1, data2, ['men', 'vaiance', -# 'sew', 'kurtoss'], num_samp=100) -# -# self.assertTrue(len(bs_vals) == 0) -# -# # Test bootstrap moment against William Rice's (UCSB) bootstrap -# # programs in Statistics 101. Just testing the mean, but the -# # implementation is the same for all of them -# test_ci = np.array([-23.4, -14.6]) -# -# bs_vals = bootstrap_moment(data1, data2, ['mean', 'variance', -# 'skew', 'kurtosis'], num_samp=50000) -# -# # Check that Bill Rice's and our 95% CIs match -# self.assertTrue(np.array_equal(test_ci, np.round(bs_vals['mean'][1], -# decimals=1))) -# -# # Check that the deltas match -# self.assertTrue(-19 == bs_vals["mean"][0]) -# -# # Check that the length is right -# self.assertTrue(len(bs_vals) == 4) -# + # def test_lrt(self): + + # # Test against what the lrtest() R function returns + # model1 = 158.0494 + # model0 = 139.806 + # R_chisquare = 36.4868 + # R_p = 1.537e-09 + + # pred_chi, pred_p = lrt(model1, model0, 1) + + # assert_almost_equal(pred_chi, R_chisquare) + # assert_almost_equal(pred_p, R_p) + + # def test_empirical_cdf(self): + + # #Test against R's ecdf function + + # # Test Case 1 + # test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] + # R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] + # res = empirical_cdf(test_data) + # assert_array_equal(R_res, res) + + # # Test Case 2 + # test_data = [3, 3, 3, 3] + # R_res = [1, 1, 1, 1] + # res = empirical_cdf(test_data) + # assert_array_equal(R_res, res) From 120cedcceed13269d03e3ef4d6acc07e844790aa Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 14:53:43 -0700 Subject: [PATCH 148/343] Fixed misaligned docstring --- macroeco/empirical/empirical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index f46e37b..aeb692d 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -311,7 +311,7 @@ def sad(patch, cols, splits='', clean=True): Returns ------- {1} Result has two columns: spp (species identifier) and y (individuals of - that species). + that species). Notes ----- @@ -365,7 +365,7 @@ def ssad(patch, cols, splits=''): Returns ------- {1} Result has one column giving the individuals of species in each - subplot. + subplot. Notes ----- From f62a69adbd3fdcf7e01a4d74bb3849a53ffa94c8 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 14:55:33 -0700 Subject: [PATCH 149/343] Realigned comments in compare.py --- macroeco/compare/compare.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index 0648597..7abaf6e 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -7,14 +7,20 @@ from ..misc import doc_sub -_data_doc = """data : array-like - data from which to caculate the the likelihood""" +_data_doc = \ + """data : array-like + data from which to caculate the the likelihood + """ -_model_doc = """model : scipy distribution object - A frozen scipy model object. Needs to have the attribute *.shape""" +_model_doc = \ + """model : scipy distribution object + A frozen scipy model object. Needs to have the attribute *.shape + """ -_obs_pred_doc = """obs, pred : array-like objects - Observed and predicted data""" +_obs_pred_doc = \ + """obs, pred : array-like objects + Observed and predicted data + """ @doc_sub(_data_doc, _model_doc) @@ -25,7 +31,6 @@ def nll(data, model): Parameters ---------- {0} - {1} Returns @@ -51,7 +56,6 @@ def lrt(data, model_null, model_alt, df=None): Parameters ---------- {0} - model_null : scipy distribution object The null model as a frozen scipy distribution object. Parameters of distribution must be given as keyword arguments. @@ -104,9 +108,7 @@ def AIC(data, model, params=None, corrected=True): Parameters ---------- {0} - {1} - params : int The number of parameters in the model. If None, calculates the number of parameters from the distribution object @@ -187,7 +189,6 @@ def sum_of_squares(obs, pred): Parameters ---------- {0} - Returns ------- float @@ -206,7 +207,6 @@ def r_squared(obs, pred, one_to_one=False, log_trans=True): Parameters ---------- {0} - one_to_one : bool If True, calculates the R^2 based on the one-to-one line as done in [#]_. If False, calculates the standard R^2 from a regression fit. @@ -293,11 +293,3 @@ def bin_data(data, max_num): hist_data = np.histogram(data, bins=boundaries) return hist_data - - -def _to_arrays(*args): - ''' - Converts all args to np.arrays - ''' - return tuple([np.array(ta) if np.iterable(ta) else np.array([ta]) for ta in - args]) From 15997a6afdba26294a925142afddf8fdba344dcc Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 26 Mar 2014 17:00:16 -0700 Subject: [PATCH 150/343] Get rid of mpltools dependency, use own rcparams values --- doc/conf.py | 6 +++--- macroeco/main/main.py | 9 ++++----- macroeco/misc/rcparams.py | 26 ++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) create mode 100644 macroeco/misc/rcparams.py diff --git a/doc/conf.py b/doc/conf.py index 2ed2206..76fd7e0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -26,13 +26,13 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.mathjax', +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.autosummary', 'numpydoc', 'sphinx.ext.intersphinx'] autosummary_generate = True #autodoc_default_flags = ['inherited-members'] -intersphinx_mapping = {'scipy': ('http://docs.scipy.org/doc/scipy/reference/', +intersphinx_mapping = {'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None)} # Add any paths that contain templates here, relative to this directory. @@ -115,7 +115,7 @@ def __getattr__(cls, name): else: return Mock() -MOCK_MODULES = ['mpltools', 'shapely', 'shapely.geometry'] +MOCK_MODULES = ['shapely', 'shapely.geometry'] for mod_name in MOCK_MODULES: sys.modules[mod_name] = Mock() diff --git a/macroeco/main/main.py b/macroeco/main/main.py index e415b96..7d7d7be 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -11,17 +11,16 @@ import pandas as pd import matplotlib.pyplot as plt -from mpltools import style -style.use('ggplot') -import matplotlib as mpl # Colorblind safe palette -mpl.rcParams['axes.color_cycle'] = ['0072B2','D55E00','CC79A7','009E73', - 'E69F00','F0E442','56B4E9'] +import matplotlib as mpl from ..misc import setup_log from .. import empirical as emp from .. import models as mod from .. import compare as comp +from ..misc.rcparams import ggplot_rc +mpl.rcParams.update(ggplot_rc) + def main(param_path='parameters.txt', flat_output=False): """ diff --git a/macroeco/misc/rcparams.py b/macroeco/misc/rcparams.py new file mode 100644 index 0000000..e6f1eea --- /dev/null +++ b/macroeco/misc/rcparams.py @@ -0,0 +1,26 @@ +ggplot_rc = \ +{ +"patch.linewidth" : 0.5, +"patch.facecolor" : '#348ABD', +"patch.edgecolor" : '#EEEEEE', +"patch.antialiased" : True, +"font.size" : 10.0, +"axes.facecolor" : '#E5E5E5', +"axes.edgecolor" : 'white', +"axes.linewidth" : 2, +"axes.grid" : True, +"axes.titlesize" : 'x-large', +"axes.labelsize" : 'large', +"axes.labelcolor" : '#555555', +"axes.axisbelow" : True, +"axes.color_cycle" : ['#0072B2','#D55E00','#CC79A7','#009E73', '#E69F00', + '#F0E442', '#56B4E9'], +"xtick.color" : '#555555', +"xtick.direction" : 'out', +"ytick.color" : '#555555', +"ytick.direction" : 'out', +"grid.color" : 'white', +"grid.linestyle" : '-', +"figure.facecolor" : 'white', +"figure.edgecolor" : '0.50', +} From ae2e2d329f5ecb3508f2d8c571229e3c7371ab72 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 26 Mar 2014 17:01:14 -0700 Subject: [PATCH 151/343] Quick catch in main for empirical results of length 1 --- macroeco/main/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 7d7d7be..fb5b399 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -544,7 +544,10 @@ def _save_table_and_plot(spid, models, options, fit_results, name, df, emp = df_plt['empirical'] df_plt = df_plt.drop('empirical',1) - width = df['x'].values[1] - df['x'].values[0] + try: + width = df['x'].values[1] - df['x'].values[0] + except: + width = 1 ax = df_plt.plot(lw=3) exec plot_exec_str ax = _pad_plot_frame(ax) From 466707b57222d9d487901eda769baf9d90d6b02c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 26 Mar 2014 17:05:37 -0700 Subject: [PATCH 152/343] Import empirical_cdf in empirical --- macroeco/empirical/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/__init__.py b/macroeco/empirical/__init__.py index 31f6f14..a1991b8 100644 --- a/macroeco/empirical/__init__.py +++ b/macroeco/empirical/__init__.py @@ -3,7 +3,7 @@ Empirical (:mod:`macroeco.empirical`) ===================================== -This module contains functions used in the empirical analysis of +This module contains functions used in the empirical analysis of macroecological patterns. Patch @@ -28,4 +28,5 @@ """ from .empirical import (Patch, - sad, ssad) + sad, ssad, + empirical_cdf) From 15cc07244b903a995c221f6f97c551ce3bf6ac59 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 26 Mar 2014 17:13:33 -0700 Subject: [PATCH 153/343] Move desktop outside of package to parent dir --- macroeco/desktop.py => desktop.py | 10 +++++----- macroeco/desktop_mac.spec => desktop_mac.spec | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) rename macroeco/desktop.py => desktop.py (94%) rename macroeco/desktop_mac.spec => desktop_mac.spec (92%) diff --git a/macroeco/desktop.py b/desktop.py similarity index 94% rename from macroeco/desktop.py rename to desktop.py index 4642c98..53aced3 100755 --- a/macroeco/desktop.py +++ b/desktop.py @@ -15,7 +15,7 @@ import os, sys import threading as thread -import main +from macroeco.main import main class RedirectText(object): def __init__(self,aWxTextCtrl): @@ -123,7 +123,7 @@ def OnRun(self,e): def RunMain(self): self.run_button.Enable(False) # Turn the run button off - self.t = thread.Thread(target=main.main, args=(self.parampath,)) + self.t = thread.Thread(target=main, args=(self.parampath,)) self.t.daemon = True # Kills thread if app exits self.t.start() @@ -133,11 +133,11 @@ def OnIdle(self, event): self.run_button.Enable(True) # Turn the run button on if __name__ == '__main__': - # To execute, run `pythonw -m macroeco.desktop path/to/parameters.txt`. - # With arg, execute main.main(arg), without arg open GUI window + # To execute, run `pythonw -m desktop path/to/parameters.txt` + # With arg, execute main(arg), without arg open GUI window if len(sys.argv) > 1: param_path = sys.argv[1] - main.main(param_path) + main(param_path) else: app = wx.App(False) frame = MainWindow(None, 'Macroeco Desktop') diff --git a/macroeco/desktop_mac.spec b/desktop_mac.spec similarity index 92% rename from macroeco/desktop_mac.spec rename to desktop_mac.spec index 3955cbc..d5a02f9 100644 --- a/macroeco/desktop_mac.spec +++ b/desktop_mac.spec @@ -1,6 +1,6 @@ # -*- mode: python -*- a = Analysis(['desktop.py'], - pathex=['/Users/jkitzes/Projects/macroeco/macroeco'], + pathex=['/Users/jkitzes/Projects/macroeco'], hiddenimports=['scipy.special._ufuncs_cxx'], hookspath=None, runtime_hooks=None) From fb2ed4862c84f3b3f778db216ed985a980cfe339 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 11:12:30 -0700 Subject: [PATCH 154/343] Allow args and kwargs in AIC and lrt --- macroeco/compare/compare.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index 7abaf6e..36932ee 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -13,8 +13,9 @@ """ _model_doc = \ - """model : scipy distribution object - A frozen scipy model object. Needs to have the attribute *.shape + """model : frozen distribution object A frozen scipy model object. When + freezing, keyword args ``loc`` and ``scale`` should only be included if + they represent a distribution parameter. """ _obs_pred_doc = \ @@ -73,7 +74,7 @@ def lrt(data, model_null, model_alt, df=None): Returns ------- tuple - (G^2 statistic, p-value) + G^2 statistic, p-value Notes ----- @@ -95,9 +96,10 @@ def lrt(data, model_null, model_alt, df=None): # Set df if necessary if not df: - df = len(model_alt.kwds) - len(model_null.kwds) + df = ( len(model_alt.args) + len(model_alt.kwds) + - len(model_null.args) - len(model_null.kwds) ) - return (test_stat, stats.chisqprob(test_stat, df)) + return test_stat, stats.chisqprob(test_stat, df) @doc_sub(_data_doc, _model_doc) @@ -138,7 +140,7 @@ def AIC(data, model, params=None, corrected=True): L = nll(data, model) if not params: - k = len(model.kwds) + k = len(model.kwds) + len(model.args) else: k = params From 6e2bca564f282d5776e952f3ecf96ba824768fa5 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 17:58:13 -0700 Subject: [PATCH 155/343] Added cnbinom to init --- macroeco/models/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 08ba9fa..0df04b6 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -36,5 +36,5 @@ they are not common in quantitative ecology. """ -from _distributions import (geom, geom_uptrunc, nbinom, +from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, expon, expon_uptrunc) From d20b7004cc8d4544bf482ac37000918d916bba10 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 17:58:59 -0700 Subject: [PATCH 156/343] Added beginning of cnbinom to distributions --- macroeco/models/_distributions.py | 48 +++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 5f03d1e..01d3836 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -467,6 +467,54 @@ def nll(data, mu, k): return k_array[min_nll_idx] +class cnbinom_gen(rv_discrete_meco): + r""" + The conditional (finite) negative binomial distribution described by + Zillio and He (2010) and Conlisk et al. (2007) + + MORE + """ + # TODO: Set b (upper bound). Is this the same as an upper truncated NBD? + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu, k_agg, b): + return mu, k_agg, b + + @inherit_docstring_from(rv_discrete_meco) + def fit_mle(self, data, b=None, k_range=(0.1, 100, 0.1)): + pass + # mu = np.mean(data) + + # if not b: + # b = np.sum(data) + + # return mu, _solve_k_from_mu(data, cnbinom, k_range, mu=mu, b=b) + + def _pmf(self, x, mu, k_agg, b): + return np.exp(self._logpmf(x, mu, k_agg, b)) + + def _logpmf(self, x, mu, k_agg, b): + ln_l = lambda n_i, n, a, k_agg: _ln_choose(n_i + k_agg - 1, n_i) + \ + _ln_choose(n - n_i + (k_agg / a) - k_agg - 1, n - n_i) -\ + _ln_choose(n + (k_agg / a) - 1, n) + a = mu / b + return ln_l(x, b, a, k_agg) + + def _stats(self, mu, k_agg, b): + pass + +cnbinom = cnbinom_gen(name="cnbinom", shapes="mu, k_agg, b") + + +def _ln_choose(n, k_agg): + ''' + log binomial coefficient with extended gamma factorials. n and k_agg may be int + or array - if both array, must be the same length. + ''' + gammaln = special.gammaln + return gammaln(n + 1) - (gammaln(k_agg + 1) + gammaln(n - k_agg + 1)) + + # # Continuous # From b25a2cd1e2de2ea6c3635f1fe22bde9ada74a9a1 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 17:59:49 -0700 Subject: [PATCH 157/343] Changed k to k_agg to avoid conflict with native scipy variable --- macroeco/models/_distributions.py | 108 ++++++++++++++++-------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 01d3836..854394d 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -348,12 +348,11 @@ def p_eq(x, mu, b): x, mu, b = Decimal(x), Decimal(mu), Decimal(b) return ( (x / (1 - x)) - ((b + 1) / (x**-b - 1)) - mu ) - # x here is the param raised to the k power, or 1 - p + # x here is the param raised to the k_agg power, or 1 - p return 1 - optim.brentq(p_eq, 1e-9, 20, args=(mu, b), disp=True) _geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) - class nbinom_gen(spdist.nbinom_gen): r""" A negative binomial discrete random variable. @@ -364,98 +363,103 @@ class nbinom_gen(spdist.nbinom_gen): .. math:: - P(x) = - \frac{\Gamma (k + x)}{\Gamma(k) x!} \left(\frac{k}{k+\mu}\right)^k - \left(\frac{\mu}{k+\mu}\right)^x + p(x) = \frac{\gamma (k_agg + x)}{\gamma(k_agg) x!} + \left(\frac{k_agg}{k_agg+\mu}\right)^k_agg + \left(\frac{\mu}{k_agg+\mu}\right)^x - for ``x >= 0``. In the traditional parameterization, ``n = k`` (the size - parameter) and ``p = k / (k + mu)``. The ``loc`` parameter is not used. + for ``x >= 0``. in the traditional parameterization, ``n = k_agg`` (the + size parameter) and ``p = k_agg / (k_agg + mu)``. the ``loc`` parameter is + not used. Methods ------- - translate_args(mu, k) - Not used, returns mu and k. + translate_args(mu, k_agg) + not used, returns mu and k_agg. fit_mle(data, k_range=(0.1,100,0.1)) - ML estimate of shape parameters mu and k given data, with k evaluated - at (min, max, step) values given by k_range. + ml estimate of shape parameters mu and k_agg given data, with k_agg + evaluated at (min, max, step) values given by k_range. %(before_notes)s mu : float distribution mean - k : float + k_agg : float clustering parameter """ @inherit_docstring_from(rv_discrete_meco) - def translate_args(self, mu, k): - return mu, k + def translate_args(self, mu, k_agg): + return mu, k_agg @inherit_docstring_from(rv_discrete_meco) def fit_mle(self, data, k_range=(0.1,100,0.1)): """%(super)s - In addition to data, gives an optional keyword argument - k_range contains a tuple of the start, stop, and step values to search - for k. Default is ``k_range=(0.1,100,0.1)``. A brute force search is - then used to find the parameter k. + + In addition to data, gives an optional keyword argument k_range + contains a tuple of the start, stop, and step values to search for + k_agg. default is ``k_range=(0.1,100,0.1)``. a brute force search is + then used to find the parameter k_agg. """ - # TODO: Check and mention in docstring biases of MLE for k + # todo: check and mention in docstring biases of mle for k_agg mu = np.mean(data) return mu, _nbinom_solve_k_from_mu(data, mu, k_range) - def _get_p_from_mu(self, mu, k): - return k / (k + mu) + def _get_p_from_mu(self, mu, k_agg): + return k_agg / (k_agg + mu) + + def _rvs(self, mu, k_agg): + p = self._get_p_from_mu(mu, k_agg) + return nprand.negative_binomial(k_agg, p, self._size) + + def _argcheck(self, mu, k_agg): + p = self._get_p_from_mu(mu, k_agg) + return (k_agg >= 0) & (p >= 0) & (p <= 1) - def _rvs(self, mu, k): - p = self._get_p_from_mu(mu, k) - return nprand.negative_binomial(k, p, self._size) + def _pmf(self, x, mu, k_agg): + p = self._get_p_from_mu(mu, k_agg) + return np.exp(self._logpmf(x, mu, k_agg)) - def _argcheck(self, mu, k): - p = self._get_p_from_mu(mu, k) - return (k >= 0) & (p >= 0) & (p <= 1) + def _logpmf(self, x, mu, k_agg): + p = self._get_p_from_mu(mu, k_agg) - def _pmf(self, x, mu, k): - p = self._get_p_from_mu(mu, k) - return np.exp(self._logpmf(x, mu, k)) + coeff =\ + special.gammaln(k_agg+x)-special.gammaln(x+1)-special.gammaln(k_agg) - def _logpmf(self, x, mu, k): - p = self._get_p_from_mu(mu, k) - coeff = special.gammaln(k+x)-special.gammaln(x+1)-special.gammaln(k) - return coeff + k*np.log(p) + x*np.log(1-p) + return coeff + k_agg*np.log(p) + x*np.log(1-p) - def _cdf(self, x, mu, k): - p = self._get_p_from_mu(mu, k) + def _cdf(self, x, mu, k_agg): + p = self._get_p_from_mu(mu, k_agg) x = np.floor(x) - return special.betainc(k, x+1, p) + return special.betainc(k_agg, x+1, p) - def _ppf(self, q, mu, k): - p = self._get_p_from_mu(mu, k) - vals = np.ceil(special.nbdtrik(q, k, p)) + def _ppf(self, q, mu, k_agg): + p = self._get_p_from_mu(mu, k_agg) + vals = np.ceil(special.nbdtrik(q, k_agg, p)) vals1 = (vals-1).clip(0.0, np.inf) - temp = self._cdf(vals1, k, p) + temp = self._cdf(vals1, k_agg, p) return np.where(temp >= q, vals1, vals) - def _stats(self, mu, k): - p = self._get_p_from_mu(mu, k) + def _stats(self, mu, k_agg): + p = self._get_p_from_mu(mu, k_agg) Q = 1.0 / p - P = Q - 1.0 - mu = k*P - var = k*P*Q - g1 = (Q+P)/np.sqrt(k*P*Q) - g2 = (1.0 + 6*P*Q) / (k*P*Q) + p = q - 1.0 + mu = k_agg*p + var = k_agg*p*q + g1 = (q+p)/np.sqrt(k_agg*p*q) + g2 = (1.0 + 6*p*q) / (k_agg*p*q) return mu, var, g1, g2 -nbinom = nbinom_gen(name='nbinom', shapes='mu, k') +nbinom = nbinom_gen(name='nbinom', shapes='mu, k_agg') def _nbinom_solve_k_from_mu(data, mu, k_range): """ - For the nbinom, given mu, return k from searching some k_range. + For the nbinom, given mu, return k_agg from searching some k_range. """ # TODO: See if a root finder like fminbound would work with Decimal used in # logpmf method (will this work with arrays?) - def nll(data, mu, k): - return -np.sum(nbinom._logpmf(data, mu, k)) + def nll(data, mu, k_agg): + return -np.sum(nbinom._logpmf(data, mu, k_agg)) k_array = np.arange(*k_range) nll_array = np.zeros(len(k_array)) From 221cd865230371d35f2c8d517a4efe473c55eda4 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 26 Mar 2014 18:16:36 -0700 Subject: [PATCH 158/343] Generalized brute force solver for cnbinom --- macroeco/models/_distributions.py | 71 ++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 854394d..bb51bad 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -402,7 +402,7 @@ def fit_mle(self, data, k_range=(0.1,100,0.1)): """ # todo: check and mention in docstring biases of mle for k_agg mu = np.mean(data) - return mu, _nbinom_solve_k_from_mu(data, mu, k_range) + return mu, _solve_k_from_mu(data, k_range, nbinom_nll, mu) def _get_p_from_mu(self, mu, k_agg): return k_agg / (k_agg + mu) @@ -451,25 +451,10 @@ def _stats(self, mu, k_agg): nbinom = nbinom_gen(name='nbinom', shapes='mu, k_agg') -def _nbinom_solve_k_from_mu(data, mu, k_range): - """ - For the nbinom, given mu, return k_agg from searching some k_range. - """ - # TODO: See if a root finder like fminbound would work with Decimal used in - # logpmf method (will this work with arrays?) - - def nll(data, mu, k_agg): - return -np.sum(nbinom._logpmf(data, mu, k_agg)) - k_array = np.arange(*k_range) - nll_array = np.zeros(len(k_array)) +def nbinom_nll(data, k_agg, mu): + return -np.sum(nbinom._logpmf(data, mu, k_agg)) - for i in range(len(k_array)): - nll_array[i] = nll(data, mu, k_array[i]) - - min_nll_idx = np.argmin(nll_array) - - return k_array[min_nll_idx] class cnbinom_gen(rv_discrete_meco): r""" @@ -486,13 +471,12 @@ def translate_args(self, mu, k_agg, b): @inherit_docstring_from(rv_discrete_meco) def fit_mle(self, data, b=None, k_range=(0.1, 100, 0.1)): - pass - # mu = np.mean(data) + mu = np.mean(data) - # if not b: - # b = np.sum(data) + if not b: + b = np.sum(data) - # return mu, _solve_k_from_mu(data, cnbinom, k_range, mu=mu, b=b) + return mu, _solve_k_from_mu(data, k_range, cnbinom_nll, mu, b) def _pmf(self, x, mu, k_agg, b): return np.exp(self._logpmf(x, mu, k_agg, b)) @@ -510,10 +494,15 @@ def _stats(self, mu, k_agg, b): cnbinom = cnbinom_gen(name="cnbinom", shapes="mu, k_agg, b") +def cnbinom_nll(data, k_agg, mu, b): + return -np.sum(cnbinom._logpmf(data, mu, k_agg, b)) + def _ln_choose(n, k_agg): ''' - log binomial coefficient with extended gamma factorials. n and k_agg may be int - or array - if both array, must be the same length. + + log binomial coefficient with extended gamma factorials. n and k_agg may be + int or array - if both array, must be the same length. + ''' gammaln = special.gammaln return gammaln(n + 1) - (gammaln(k_agg + 1) + gammaln(n - k_agg + 1)) @@ -637,3 +626,35 @@ def _stats(self, lam, b): return expon.stats(lam) expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') + + +def _solve_k_from_mu(data, k_range, nll, *args): + """ + For given args, return k_agg from searching some k_range. + + Parameters + ---------- + data : array + k_range : array + nll : function + + args : + + Returns + -------- + :float + Minimum k_agg + + """ + # TODO: See if a root finder like fminbound would work with Decimal used in + # logpmf method (will this work with arrays?) + + k_array = np.arange(*k_range) + nll_array = np.zeros(len(k_array)) + + for i in range(len(k_array)): + nll_array[i] = nll(data, k_array[i], *args) + + min_nll_idx = np.argmin(nll_array) + + return k_array[min_nll_idx] \ No newline at end of file From 344b178936ee2125c54d400b7ee15f834e8c47f4 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 12:06:39 -0700 Subject: [PATCH 159/343] Clarify split vs subset language in main --- macroeco/main/main.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index fb5b399..857576f 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -294,7 +294,7 @@ def _fit_models(options, core_results): Returns ------- list of dicts - Each element in list corresponds to a split. The dict has a key for + Each element in list corresponds to a subset. The dict has a key for each model given in options, and the value is a list of fitted parameters (tuple), values (array), comparison statistic names (list), and comparison statistic values (list). @@ -312,7 +312,7 @@ def _fit_models(options, core_results): # TODO: Make work for 2D results, i.e., curves, comm_sep, o_ring # TODO: Make work for curves in general (check if 'x' present in core_res) fit_results = [] - for core_result in core_results: # Each split + for core_result in core_results: # Each subset fit_result = {} for model in models: data = core_result[1]['y'].values @@ -342,7 +342,7 @@ def _get_values(data, model, fits): return values def _get_comparison_statistic(data, fits): - return ['AIC'], [comp.AIC(data, fits)] + return ['AIC'], [0] def _save_results(options, module, core_results, fit_results): @@ -372,7 +372,7 @@ def _save_results(options, module, core_results, fit_results): # Write additional results if analysis from emp if module == 'emp': - _write_split_index_file(options, core_results) + _write_subset_index_file(options, core_results) if fit_results: # If models given for i, core_result in enumerate(core_results): @@ -418,15 +418,15 @@ def _get_file_path(spid, options, file_name): '%i_%s' % (spid+1, file_name)) -def _write_split_index_file(options, core_results): +def _write_subset_index_file(options, core_results): """ - Write table giving index of splits, giving number and combination + Write table giving index of subsets, giving number and subset string """ - f_path = os.path.join(options['run_dir'], '_split_index.csv') - split_strs = zip(*core_results)[0] - index = np.arange(len(split_strs)) + 1 - df = pd.DataFrame({'splits': split_strs}, index=index) + f_path = os.path.join(options['run_dir'], '_subset_index.csv') + subset_strs = zip(*core_results)[0] + index = np.arange(len(subset_strs)) + 1 + df = pd.DataFrame({'subsets': subset_strs}, index=index) df.to_csv(f_path) From 74f634e0a6f84cb4ca0e4746cb42446945b7f658 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 13:05:34 -0700 Subject: [PATCH 160/343] Added cnbinom to init --- macroeco/models/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 0df04b6..0796004 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -30,6 +30,7 @@ geom geom_uptrunc nbinom + cnbinom .. DV: Our public-facing distributions do not use location and scale parameters, as From 70996ab4c7c9bc3b700f61f1b87cc1570695a1bb Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 13:06:33 -0700 Subject: [PATCH 161/343] Updated docstring and stats method of cnbinom --- macroeco/models/_distributions.py | 72 ++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index bb51bad..0418bc8 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -363,9 +363,8 @@ class nbinom_gen(spdist.nbinom_gen): .. math:: - p(x) = \frac{\gamma (k_agg + x)}{\gamma(k_agg) x!} - \left(\frac{k_agg}{k_agg+\mu}\right)^k_agg - \left(\frac{\mu}{k_agg+\mu}\right)^x + p(x) = \frac{\gamma (k + x)}{\gamma(k) x!} + \left(\frac{k}{k+\mu}\right)^k \left(\frac{\mu}{k+\mu}\right)^x for ``x >= 0``. in the traditional parameterization, ``n = k_agg`` (the size parameter) and ``p = k_agg / (k_agg + mu)``. the ``loc`` parameter is @@ -391,7 +390,7 @@ def translate_args(self, mu, k_agg): return mu, k_agg @inherit_docstring_from(rv_discrete_meco) - def fit_mle(self, data, k_range=(0.1,100,0.1)): + def fit_mle(self, data, k_range=(0.1, 100, 0.1)): """%(super)s In addition to data, gives an optional keyword argument k_range @@ -458,12 +457,44 @@ def nbinom_nll(data, k_agg, mu): class cnbinom_gen(rv_discrete_meco): r""" - The conditional (finite) negative binomial distribution described by - Zillio and He (2010) and Conlisk et al. (2007) + The conditional negative binomial random variable - MORE + This distribution was described by Zillio and He (2010) [#]_ and Conlisk + et al. (2007) [#]_ + + .. math:: + + p(x) = \frac{\binom{x + k - 1}{x} \binom{b - x + k/a - k -1}{b + -x}}{\binom{b + k/a - 1}{b}} + + for ``x >= 0``. In this parameterization ``a = E[p(x)] / b`` where ``b`` is + the upper limit of the distribution. + + Methods + ------- + translate_args(mu, k_agg) + not used, returns mu, k_agg, and b. + fit_mle(data, k_range=(0.1,100,0.1)) + ml estimate of shape parameters mu and k_agg given data, with k_agg + evaluated at (min, max, step) values given by k_range. + %(before_notes)s + mu : float + distribution mean + k_agg : float + clustering parameter (refered to as ``k`` above) + b : float + Upper bound of distribution + + References + ---------- + .. [#] + Zillio, T. & He, F. (2010). Modeling spatial aggregation of finite + populations. Ecology, 91(12), 3698-3706 + .. [#] + Conlisk, E., Bloxham, M., Conlisk, J, Enquist, E., and Harte, J. + (2007). A new class of models of spatial distribution. Ecological + Monographs, 77(2), 269-284 """ - # TODO: Set b (upper bound). Is this the same as an upper truncated NBD? @inherit_docstring_from(rv_discrete_meco) def translate_args(self, mu, k_agg, b): @@ -476,30 +507,39 @@ def fit_mle(self, data, b=None, k_range=(0.1, 100, 0.1)): if not b: b = np.sum(data) - return mu, _solve_k_from_mu(data, k_range, cnbinom_nll, mu, b) + return mu, _solve_k_from_mu(data, k_range, _cnbinom_nll, mu, b) def _pmf(self, x, mu, k_agg, b): return np.exp(self._logpmf(x, mu, k_agg, b)) def _logpmf(self, x, mu, k_agg, b): - ln_l = lambda n_i, n, a, k_agg: _ln_choose(n_i + k_agg - 1, n_i) + \ - _ln_choose(n - n_i + (k_agg / a) - k_agg - 1, n - n_i) -\ - _ln_choose(n + (k_agg / a) - 1, n) a = mu / b - return ln_l(x, b, a, k_agg) + logpmf = _cnbinom_logpmf(x, b, a, k_agg) + logpmf[x > b] = -np.inf + return logpmf def _stats(self, mu, k_agg, b): - pass + mu = mu + var = ((1 - mu / b) * mu * (k_agg + mu)) / (k_agg + (mu / b)) + return mu, var, None, None cnbinom = cnbinom_gen(name="cnbinom", shapes="mu, k_agg, b") -def cnbinom_nll(data, k_agg, mu, b): +def _cnbinom_logpmf(n_i, n, a, k_agg): + # Logpmf for cnbinom + return _ln_choose(n_i + k_agg - 1, n_i) + \ + _ln_choose(n - n_i + (k_agg / a) - k_agg - 1, n - n_i) -\ + _ln_choose(n + (k_agg / a) - 1, n) + + +def _cnbinom_nll(data, k_agg, mu, b): + # Negative log likelihood for cnbinom return -np.sum(cnbinom._logpmf(data, mu, k_agg, b)) + def _ln_choose(n, k_agg): ''' - log binomial coefficient with extended gamma factorials. n and k_agg may be int or array - if both array, must be the same length. From 90a12aa2db12dc8ed5d64d6f11211ff160fb118b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 13:06:52 -0700 Subject: [PATCH 162/343] Added visual test of cnbinom distribution --- macroeco/models/test_distributions.py | 42 +++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 627dd0d..5eb27de 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -11,9 +11,9 @@ import numpy as np from decimal import Decimal -import macroeco.distributions2 as dist2 -from macroeco.distributions2 import * +from macroeco.models import * import matplotlib.pyplot as plt +import scipy.stats as stats class TestGeom(TestCase): @@ -145,6 +145,44 @@ def test_fit_mle_with_manual_calc(self): mu, k = nbinom.fit_mle(x, k_range=(0.01,10,0.01)) assert_array_almost_equal([mu, k], [9, 8.54], decimal=2) +class TestCnbinom(TestCase): + pass + + # def test_zillio_plots(self): + # """ Test the cnbinom function replicated the Zillio and He plots """ + + # # Define Preliminary + # a = np.array([0.1, .3, .8]) + # k = np.array([.1, 1, 10]) + # fnbd_vec = [] + # nbd_vec = [] + # binm_vec = [] + # descrip = [] + + # # Get data + # for ta in a: + # for tk in k: + # fnbd_vec.append(cnbinom.pmf(np.arange(1,101), ta*100, tk, 100)) + # nbd_vec.append(nbinom.pmf(np.arange(1,101), ta*100, tk)) + # binm_vec.append(stats.binom.pmf(np.arange(1,101), 100, ta)) + + # descrip.append("a=%s, k=%s" % (ta, tk)) + + # # Loop through the data and plot it. + # for i in xrange(len(fnbd_vec)): + # plt.clf() + # plt.plot(np.arange(1,101), fnbd_vec[i]) + # plt.plot(np.arange(1,101), nbd_vec[i], '--') + # plt.plot(np.arange(1,101), binm_vec[i], '.-') + # plt.legend(('fnbd', 'nbd', 'binm'), loc='best') + # plt.xlabel('abundance') + # plt.ylabel('P(x)') + # plt.ylim((0, .12)) + # plt.text(plt.xlim()[1] * 0.6, plt.ylim()[1] * 0.8, descrip[i]) + # plt.show() + # plt.clf() + + class TestExpon(TestCase): pass From a440e637877e15799b872027a3897a6ea1ed7f38 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 13:57:19 -0700 Subject: [PATCH 163/343] Change cols to string format --- macroeco/empirical/empirical.py | 36 ++++++++++++++++++------- macroeco/main/main.py | 47 ++++++--------------------------- 2 files changed, 34 insertions(+), 49 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index aeb692d..517a2c6 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -1170,18 +1170,34 @@ def z(doubleS, halfS): -def _get_cols(special_cols_names, cols, patch): +def _get_cols(special_col_names, cols, patch): """ - Retrieve values of special_cols from cols dict or Patch metadata + Retrieve values of special_cols from cols string or patch metadata """ - special_cols_values = [] - for col in special_cols_names: - col_value = cols.get(col, None) - if col_value is None: - col_value = patch.meta['Description'].get(col, None) - special_cols_values.append(col_value) - - return tuple(special_cols_values) + + # If cols not given, try to fall back on cols from metadata + if not cols: + if 'cols' in patch.meta['Description'].keys(): + cols = patch.meta['Description']['cols'] + else: + raise NameError, ("cols argument not given, spp_col at a minimum " + "must be specified") + + # Parse cols string into dict + cols = cols.replace(' ', '') + col_list = cols.split(';') + col_dict = {x.split(':')[0]: x.split(':')[1] for x in col_list} + + # Check for spp_col + if 'spp_col' not in col_dict.keys(): + raise NameError, ("spp_col not specified") + + # Get special_col_names from dict + result = [] + for special_col_name in special_col_names: + result.append(col_dict.get(special_col_name, None)) + + return tuple(result) @doc_sub(splits_note) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 857576f..1112f58 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -205,55 +205,24 @@ def _emp_extra_options(options): Get special options patch, cols, and splits if analysis in emp module """ + # Check that metadata is valid metadata_path = os.path.normpath(os.path.join(options['param_dir'], options['metadata'])) if not os.path.isfile(metadata_path): raise IOError, ("Path to metadata file %s is invalid." % metadata_path) + # Using subset if given, create and store patch subset = options.get('subset', '') - options['patch'] = emp.Patch(metadata_path, subset) - options['cols'], options['splits'] = _get_cols_splits(options) - - return options + # If cols or splits not given in options, make empty strings + if 'cols' not in options.keys(): + options['cols'] = '' + if 'splits' not in options.keys(): + options['splits'] = '' -def _get_cols_splits(options): - """ - Notes - ----- - Always returns strings, even if dictionary or list is constructed here, to - ensure consistency with provided options. - - """ - - cols = {} - special_cols = ['spp_col', 'count_col', 'energy_col', 'mass_col'] - - # Cols may be given as option or individual col options may be options - if 'cols' in options.keys(): - cols = eval(options['cols']) # Must be string representing dict - else: - for col in special_cols: - cols[col] = options.get(col, None) - - # If col is still None, try to fall back to metadata - for col in special_cols: - if cols[col] is None: - cols[col] = options['patch'].meta['Description'].get(col, None) - - # Splits may be given as option, else is set to None - if 'splits' in options.keys(): - splits = options['splits'] - else: - splits = None - - # Every metric requires a spp_col - if 'spp_col' not in cols.keys(): - raise ValueError, 'spp_col not specified' - - return str(cols), str(splits) + return options def _arg_kwarg_lists(options, module): From e494f309c49d5ed0e313d627e09c855790e712f2 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 13:59:26 -0700 Subject: [PATCH 164/343] Add 1d check to geom_uptrunc cdf --- macroeco/models/_distributions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 5f03d1e..74b93d3 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -320,7 +320,10 @@ def _argcheck(self, p, b): def _pmf(self, x, p, b): pmf = (1.0-p)**x * p / (1.0-(1.0-p)**(b+1)) - pmf[x > b] = 0 + if len(np.atleast_1d(x)) > 1: + pmf[x > b] = 0 + elif x > b: + pmf = 0 return pmf def _cdf(self, x, p, b): From 98d77ce73f2c75f7750df209b75be0aa5fb65fad Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 14:02:06 -0700 Subject: [PATCH 165/343] First draft of _curves --- macroeco/models/__init__.py | 4 +- macroeco/models/_curves.py | 210 ++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 macroeco/models/_curves.py diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 08ba9fa..ed956cb 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -36,5 +36,7 @@ they are not common in quantitative ecology. """ -from _distributions import (geom, geom_uptrunc, nbinom, +from ._distributions import (geom, geom_uptrunc, nbinom, expon, expon_uptrunc) + +from ._curves import (power_law, mete_sar) \ No newline at end of file diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py new file mode 100644 index 0000000..8b815b0 --- /dev/null +++ b/macroeco/models/_curves.py @@ -0,0 +1,210 @@ +from __future__ import division + +import numpy as np +import pandas as pd +from scipy import optimize + +from ..misc import inherit_docstring_from + +_doc_methods = \ +"""Methods + ------- + vals(x, parameters) + Dependent variable y given independent variable x and curve parameters + fit_lsq(x, y_obs, params_start=None) + Least squares fit of parameters given data""" + +_doc_parameters = \ +"""Parameters + ---------- + x : iterable + Independent variable + y_obs : iterable + Dependent variable (values observed at x) + params_start : iterable + Optional start values for all parameters. Default 1.""" + + +class curve(object): + """ + Generic function class meant for subclassing + """ + + def __init__(self, name=None, parameters=None): + """ + Distribution parameters may be given here or to individual methods + + """ + self.name = name + self.parameters = parameters + self.n_parameters = len(parameters.split(',')) + + def __call__(self, *args, **kwargs): + raise ValueError, "Choose either the vals or fit_lsq methods" + + def vals(self, x, *args, **kwargs): + """ + [Docstring] + + """ + self.vals_kwargs = kwargs + x = np.array(x) + y = self._vals(x, *args) + return pd.DataFrame({'x': x, 'y': y}) + + def _vals(self, x, *args): + """ + Return y given x and parameters + """ + raise NotImplementedError, ("vals not implemented for %s" % self.name) + + def fit_lsq(self, x, y_obs, params_start=None): + """ + Fit curve by method of least squares. + + Parameters + ---------- + x : iterable + Independent variable + y_obs : iterable + Dependent variable (values observed at x) + params_start : iterable + Optional start values for all parameters. Default 1. + + Returns + ------- + array + Best fit values of parameters + + Notes + ----- + If least squares fit does not converge, ValueError is raised with + convergence message. + + """ + + # Set up variables + x = np.array(x) + y_obs = np.array(y_obs) + if not params_start: + params_start = np.ones(len(self.parameters)) + + # Error checking + if len(x) != len(y): + raise ValueError, "x and y_obs must be the same length" + if len(params) != self.n_parameters: + raise ValueError, "Incorrect number of values in params_start" + + # Calculate fit + def residuals(params, x, y_obs): + y_pred = self.vals(x, *params)['y'] + return y_obs - y_pred + + params_fit, _, _, msg, ier = optimize.leastsq(residuals, params_start, + args=(x, y_obs), full_output=True) + + # Check for convergence + if ier > 4: + raise ValueError, ("Least squares fit did not converge with " + "message %s" % msg) + + return params_fit + + +class power_law_gen(curve): + """ + A power-law function + + .. math:: + + y = c x^z + + or equivalently + + .. math:: + + \log(y) = \log(c) + z \log(x) + + Stemming from the log form, ``c`` is often known as the intercept and ``z`` + as the slope of the power law. + + {0} + + {1} + c, z + Parameters: Log-log slope and intercept + + """ + + def _vals(self, x, c, z): + return c * x**z + +power_law = power_law_gen(name='power_law', parameters='c,z') +power_law.__doc__ = power_law.__doc__.format(_doc_methods, _doc_parameters) + +class mete_sar_gen(curve): + """ + The SAR predicted by the Maximum Entropy Theory of Ecology + + .. math:: + + y = c x^z + + or equivalently + + .. math:: + + \log(y) = \log(c) + z \log(x) + + {0} + + {1} + S0, N0 + Parameters: Initial species richness and community abundance at largest + scale + iterative : bool + If true, SAR calculation for subplots are based on variables for next + larger area instead of initial plot variables. Default False. + + """ + + def _vals(self, x, S0, N0, iterative=False): + # x is area, y is S + + A0 = x[0] + y = [S0] + + for A in x[1:]: + S1, N1 = self._single_step(S0, N0, A/A0) + y.append(S1) + if iterative: + S0, N0, A0 = S1, N1, A + + return np.array(y) + + def _single_step(self, S0, N0, a): + # if a < 1, solve, if a > 1, guess and check + if a == 1: + S1 = S0 + N1 = N0 + elif a < 1: # "Normal" downscale + pass + else: # Upscale solver + pass + + return S1, N1 + +mete_sar = mete_sar_gen(name='mete_sar', parameters='S0,N0') +mete_sar.__doc__ = mete_sar.__doc__.format(_doc_methods, _doc_parameters) + + + + + + + + + + + + From ede19efa5310d66f76179d8e1d42b81f8f99ac3c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 14:14:22 -0700 Subject: [PATCH 166/343] Soft wrap long methods and attributes in docstrings to avoid numpydoc error --- macroeco/empirical/empirical.py | 3 +-- macroeco/models/_distributions.py | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index aeb692d..5e73af1 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -83,8 +83,7 @@ class Patch(object): table : dataframe Table of census data recorded in patch meta : ConfigParser obj - Object similar to dict describing data table, loaded from metadata file - at metadata_path and processed by subset + Object similar to dict describing data table, loaded from metadata file at metadata_path and processed by subset subset : str Subset string passed as parameter diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 0418bc8..488757e 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -375,8 +375,7 @@ class nbinom_gen(spdist.nbinom_gen): translate_args(mu, k_agg) not used, returns mu and k_agg. fit_mle(data, k_range=(0.1,100,0.1)) - ml estimate of shape parameters mu and k_agg given data, with k_agg - evaluated at (min, max, step) values given by k_range. + ml estimate of shape parameters mu and k_agg given data, with k_agg evaluated at (min, max, step) values given by k_range. %(before_notes)s mu : float distribution mean @@ -475,8 +474,7 @@ class cnbinom_gen(rv_discrete_meco): translate_args(mu, k_agg) not used, returns mu, k_agg, and b. fit_mle(data, k_range=(0.1,100,0.1)) - ml estimate of shape parameters mu and k_agg given data, with k_agg - evaluated at (min, max, step) values given by k_range. + ml estimate of shape parameters mu and k_agg given data, with k_agg evaluated at (min, max, step) values given by k_range. %(before_notes)s mu : float distribution mean From 2be068ec5fcb330ba8f61f2bf02882ebc6b21be9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 15:02:09 -0700 Subject: [PATCH 167/343] Fix q capitalization error in nbinom stats --- macroeco/models/_distributions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 0aa3a6e..4996f6a 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -443,11 +443,11 @@ def _ppf(self, q, mu, k_agg): def _stats(self, mu, k_agg): p = self._get_p_from_mu(mu, k_agg) Q = 1.0 / p - p = q - 1.0 + p = Q - 1.0 mu = k_agg*p - var = k_agg*p*q - g1 = (q+p)/np.sqrt(k_agg*p*q) - g2 = (1.0 + 6*p*q) / (k_agg*p*q) + var = k_agg*p*Q + g1 = (Q+p)/np.sqrt(k_agg*p*Q) + g2 = (1.0 + 6*p*Q) / (k_agg*p*Q) return mu, var, g1, g2 nbinom = nbinom_gen(name='nbinom', shapes='mu, k_agg') From 83127785bcda1b23dff140e17319cae58d164e38 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 15:03:07 -0700 Subject: [PATCH 168/343] Remove utils, move format modules, mark out old test files --- .../{test_empirical.py => xest_empirical.py} | 0 macroeco/{utils => misc}/form_func.py | 0 macroeco/{utils => misc}/format_data.py | 0 .../xest_form_func.py} | 0 .../xest_format_data.py} | 0 .../distributions_old.py} | 0 .../xest_distributions_old.py} | 0 macroeco/utils/__init__.py | 1 - macroeco/utils/docinherit.py | 69 --- macroeco/utils/global_strings.py | 421 -------------- macroeco/utils/make_metadata.py | 69 --- macroeco/utils/metadata_writer.py | 293 ---------- macroeco/utils/test_metadata_writer.py | 121 ---- macroeco/utils/test_workflow.py | 117 ---- macroeco/utils/workflow.py | 547 ------------------ 15 files changed, 1638 deletions(-) rename macroeco/empirical/{test_empirical.py => xest_empirical.py} (100%) rename macroeco/{utils => misc}/form_func.py (100%) rename macroeco/{utils => misc}/format_data.py (100%) rename macroeco/{utils/test_form_func.py => misc/xest_form_func.py} (100%) rename macroeco/{utils/test_format_data.py => misc/xest_format_data.py} (100%) rename macroeco/{distributions.py => models/distributions_old.py} (100%) rename macroeco/{test_distributions.py => models/xest_distributions_old.py} (100%) delete mode 100644 macroeco/utils/__init__.py delete mode 100644 macroeco/utils/docinherit.py delete mode 100644 macroeco/utils/global_strings.py delete mode 100644 macroeco/utils/make_metadata.py delete mode 100644 macroeco/utils/metadata_writer.py delete mode 100644 macroeco/utils/test_metadata_writer.py delete mode 100644 macroeco/utils/test_workflow.py delete mode 100644 macroeco/utils/workflow.py diff --git a/macroeco/empirical/test_empirical.py b/macroeco/empirical/xest_empirical.py similarity index 100% rename from macroeco/empirical/test_empirical.py rename to macroeco/empirical/xest_empirical.py diff --git a/macroeco/utils/form_func.py b/macroeco/misc/form_func.py similarity index 100% rename from macroeco/utils/form_func.py rename to macroeco/misc/form_func.py diff --git a/macroeco/utils/format_data.py b/macroeco/misc/format_data.py similarity index 100% rename from macroeco/utils/format_data.py rename to macroeco/misc/format_data.py diff --git a/macroeco/utils/test_form_func.py b/macroeco/misc/xest_form_func.py similarity index 100% rename from macroeco/utils/test_form_func.py rename to macroeco/misc/xest_form_func.py diff --git a/macroeco/utils/test_format_data.py b/macroeco/misc/xest_format_data.py similarity index 100% rename from macroeco/utils/test_format_data.py rename to macroeco/misc/xest_format_data.py diff --git a/macroeco/distributions.py b/macroeco/models/distributions_old.py similarity index 100% rename from macroeco/distributions.py rename to macroeco/models/distributions_old.py diff --git a/macroeco/test_distributions.py b/macroeco/models/xest_distributions_old.py similarity index 100% rename from macroeco/test_distributions.py rename to macroeco/models/xest_distributions_old.py diff --git a/macroeco/utils/__init__.py b/macroeco/utils/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/macroeco/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/macroeco/utils/docinherit.py b/macroeco/utils/docinherit.py deleted file mode 100644 index 0dfe06a..0000000 --- a/macroeco/utils/docinherit.py +++ /dev/null @@ -1,69 +0,0 @@ -# Recipe from http://stackoverflow.com/questions/2025562/ -# inherit-docstrings-in-python-class-inheritance -# -# Import module, then call doc_inherit = DocInherit - -""" -doc_inherit decorator - -Usage: - -class Foo(object): - def foo(self): - "Frobber" - pass - -class Bar(Foo): - @doc_inherit - def foo(self): - pass - -Now, Bar.foo.__doc__ == Bar().foo.__doc__ == Foo.foo.__doc__ == "Frobber" -""" - -from functools import wraps - -class DocInherit(object): - """ - Docstring inheriting method descriptor - - The class itself is also used as a decorator - """ - - def __init__(self, mthd): - self.mthd = mthd - self.name = mthd.__name__ - - def __get__(self, obj, cls): - if obj: - return self.get_with_inst(obj, cls) - else: - return self.get_no_inst(cls) - - def get_with_inst(self, obj, cls): - - overridden = getattr(super(cls, obj), self.name, None) - - @wraps(self.mthd, assigned=('__name__','__module__')) - def f(*args, **kwargs): - return self.mthd(obj, *args, **kwargs) - - return self.use_parent_doc(f, overridden) - - def get_no_inst(self, cls): - - for parent in cls.__mro__[1:]: - overridden = getattr(parent, self.name, None) - if overridden: break - - @wraps(self.mthd, assigned=('__name__','__module__')) - def f(*args, **kwargs): - return self.mthd(*args, **kwargs) - - return self.use_parent_doc(f, overridden) - - def use_parent_doc(self, func, source): - if source is None: - raise NameError, ("Can't find '%s' in parents"%self.name) - func.__doc__ = source.__doc__ - return func diff --git a/macroeco/utils/global_strings.py b/macroeco/utils/global_strings.py deleted file mode 100644 index 11087e1..0000000 --- a/macroeco/utils/global_strings.py +++ /dev/null @@ -1,421 +0,0 @@ -#!/usr/bin/python - -'''This python file contains global strings used in the scripts. Consolidated -in this script for easy maintenance''' - -subset = '''\nYou should examine the columns in your data set and decide if you -would like to subset your data in some particular way before the analysis -begins. It is important to note that only the subsetted data will be analyzed. -For example, if you have a column named 'year' in your data set with values -1998, 1999, and 2000 and you only want to look at the year 2000 for a -particular analysis, you should select the column year from left-hand most -dropdown list, select the == operator from the operator dropdown list and type -2000 in the value field. Similarly, you could use <, >, <=, >=, or != with any -column and value in your data.''' - -criteria = '''\nYou should examine the columns in your dataset and decide if you -would like to divide the data in a particular way for this analysis. For -example, if you have a spatial dataset with x,y coordinates and you are -interested in examining macroecological metrics for two separate halves of your -plot along the x coordinate, you could cut the x coordinate in two halves by -giving the 'x' column a value of 2. - -If the column that you would like to divide contains discrete values (e.g. -year), you could enter the keyword 'split' and each unique value will be -analyzed separately. Conversely, the value 'whole' could be given to specify -the entire column. The value 'whole' is equivalent to 1 or leaving the value -blank. If you would like to divide a given column, please select the word -'division' from the dropdown menu and input a value as discussed above.\n\n - -There are four other special words that can be used on a given column: -'species', 'energy', 'count', and 'mass'. When assigned to a column in your -data set, the special word 'species' indicates the column that contains your -species IDs, the special word 'energy' indicates the column that contains some -type of energy measure, the special word 'mass' indicates a column that -contains some type of mass measure, and the special word 'count' indicates the -column that contains your species counts. These special words can be chosen -from the dropdown menu next to each column header. The special word 'species' -MUST be assigned for every analysis. If the special word 'count' is not -assigned, the species counts are all assumed to be one.\n\n - -If there are columns in your data that are not relevant for this analysis leave -the value in the dropdown box as 'NA'. Columns designated 'NA' will not -influence the analysis.\n\n''' - - -rarity_measure = '''\nThis parameter allows you to specify the counts that you -will consider rare. If, for example, you want to know how many species in your -plot have an abundance of 2 or less you would set this parameter to 2. If you -enter more then one value, each value will be examined. Example input: [2] or -[2, 5]. The brackets MUST be included.''' - -SAD_distributions = '''\n -'logser' : Fisher's logseries distribution; -'logser_ut' : Upper-truncated logseries derived from MaxEnt; -'logser_ut_appx' : Approximation for the upper-truncated logseries; -'lognorm' : Lognormal distribution; -'plognorm_lt' : Poisson lognormal distribution with 0 truncated; -'nbd_lt' : Negative binomial distribution with 0 truncated; -'geo_ser' : Geometric series distribution; -'broken_stick' : McArthur's broken stick distribution -'most_even' : The most even SAD -'most_uneven' : The most uneven SAD -'sugihara' : Sugihara's multi-dimensional breakage model.''' - -SSAD_distributions = '''\n -'binm' : Binomial distribution; -'pois' : Poisson distribution; -'nbd' : Negative binomial distribution; -'fnbd' : Finite-negative binomial distribution; -'geo' : Geometric distribution; -'fgeo' : Finite-geometric distribution; -'tgeo' : Truncated geometric distribution derived from MaxEnt''' - -short_subset = '''\nSpecifications for how you want to subset your data before the -analysis. Note that only the subsetted data will be included in the analysis. -The left-hand dropdown box contains all the columns of your dataset and you may -choose one or more to subset. Please see analysis explanation for more detail -and examples.''' - -short_criteria = '''\nSpecifications for how you want to divide your data during -the analysis. The words you see below are the shared columns of your -dataset(s). You must designate your species column with the special word -'species' found in the dropdown menu. You are not required to fill any -additional columns for this analysis. Please see analysis explanation for more -detail and examples.''' - -optional = ''' Optional parameter. Default value: ''' - -req = ''' Required parameter.''' - -#### Formatting strings #### - -explanation_string = '''This formatting script loads {0} datasets and -reformats them into columnar data using the parameters that you specify below. -We define columnar data as a dataset that has distinct column headers and has -rows that describe the attributes of a single entity (often a species). For -example, a row could describe the spatial location of a species, the -total number of individuals of that species at that spatial location, -attributes about that location, the date the species was censuses, etc. All of -these atttributes are specified by the column headers. Please see the website -http://www.ctfs.si.edu/plots/summary/ for examples of columnar data. - -''' - -output_string = '''This formatting script outputs a formatted csv data file to -specified folder within ../macroeco/data/formatted. You can specify the name -of the output formatted file(s). If you do not, the script will hard code them -with the script name, run name, and some appended string. - -''' - -process_string = ''' -The formatting process is as follows: - -1. The specified {0} data is loaded\n -2. Any {0} data-specific formatting parameters are applied to the {0} -data\n -3. The {0} data is converted into columnar data\n -4. Any columnar data-specific formatting parameters are applied to the columnar -data\n -5. The columnar data is output\n -''' - -delimiter = '''\nThe file delimiter used in the data files. - -Example input: - -1. [','] - -Where ',' is the file delimiter. - -2. ['+'] - -Where '+' is the file delimiter. - -The brackets and quotes MUST be include''' - -missing_values_from_a_given_column = '''\nSpecifies what is a -missing data value in any given column in the data set. The input must be -formatted as a pythonic dictionary. - -Example input: - -1. {'count' : 'NA', 'year' : ''} - -This input says that the data column 'count' has missing values 'NA' and the -data column 'year' has missing values '' (blank). The brackets and semicolons -are required for this parameter''' - - -delete_missing_values = '''\nEither True or False. If True, the missing values -specified in the missing_values_from_a_given_column parameter are removed from -the formatted data (your archival data remains unchanged). If False, only NaN -values are removed from the formatted data. - -Chose either: True or False.''' - -subset = '''\nA permanent subset to the formatted data, {'column_name': -'condition'}, which will limit all analysis to records in which column_name -meets the condition. The condition is a formatted as ('comparison operator', -'value'). Possible comparison operators are '==', '!=', '<, '>', '<=', '>='. -Please note that your archival data will remain unchanged. - -Subsetting examples: - -1. {'year': ('==' , 2005), 'x': [('>' , 20), ('<' , 40)]} - -Restricts analysis to year 2005 and x values between 20 and 40. Note that for -multiple conditions for a column square brackets MUST be included -(i.e. x : [('>', 20), ('<', 40)]). For a single condition on a column they are -optional (i.e. 'year': ('==', 2005)). - -2. {'name' : ('==', 'John')} - -Includes only rows in which column 'name' equals 'John'. When subsetting on a -string, the string should be quoted (i.e. ('==', 'John')) ''' - -columns_to_split = '''\nUse this if you want to split your single dataset into -multiple datasets based on given column names. For example, if you have a -dataset with column names ('x1', 'x2', 'x3','g', 'h') and you want to make -three datasets with column names ('x1', 'g', 'h'), ('x2', 'g', 'h'), and ('x3', -'g', 'h') you could type ['x1', 'x2', 'x3'] and your single data would be made -into three datasets with the columns given above. Notice that ALL columns that -are not specified are included in each new dataset. - -Example input: - -1. ['x1', 'x2', 'x3'] OR [('x1',) ('x2',), ('x3',)] - -Makes three datasets where each one contains only one of the specified columns. -All columns that are not specified are included. The brackets ([]) MUST be -included. - -2. [('x1', 'y1'), ('x2', y2'), ('x3', 'y3'), ('x4', 'y4')] - -Makes four datasets where each data set contains only one of the above pairs -x,y pairs. For example, the first data set would have columns ('x1', 'y1', ... -all unspecified columns) but it would not have columns 'x2', 'x3', 'x4, 'y2', -'y3', or 'y4'. ''' - -change_column_names = '''\nSpecifies the column names that you wish to change and -the names that you wish to change them to. This parameter is useful if you -wish to merge data sets. - -Example input: - -1. (['fred', 'jane'], ['mark', 'mary']) or ['fred', 'jane'], ['mark', 'mary'] - -Changes column 'fred' to 'mark' and column 'jane' to 'mary' in all datasets. -The brackets are required. - -2. ([('x1', 'x2', 'x3'), 'h1'], ['x', 'h']) - -Changes columns 'x1', 'x2', 'x3' to 'x' and column 'h1' to 'h'. All -brackets are required.''' - -add_column_names_and_values = '''\nSpecifies additional columns that you want to -add to the data and the values the column will take for each dataset. - -Example input: - -1. {'year' : (1998, 1999, 2000), 'name' : ('Fred', 'George', 'Ron')} - -Adds the column 'year' and 'name' to all datasets. In this example, there are -three data sets and the values of 'year' for the first, second, and third -dataset are set to 1998, 1999, and 2000, respectively. Similarly, the values of -column 'name' for the first, second, and third dataset are set to 'Fred', -'George', and 'Ron', respectively. The length of values to be assigned (i.e. -(1998, 1999, 2000)) must equal the number of datasets or be one. All brackets and -punctuation must be included - -2. {'year' : 1998} - -Adds the columns 'year' with a value of 1998 to all datasets being -considered. - -''' -names_of_columns_to_be_removed = '''\nRemove any number of columns from the -dataset by specifying the column names. - -Example Input: - -1. 'name' - -Removes the column 'name' from all data sets - -2. ['name', 'species', 'date'] - -Remove the columns 'name', 'species', and 'date' from all data sets -''' - -merge_data = '''\nEither Y/yes or N/no. If Y/yes, attempts to merge all of the -data into one dataset. If the merge is successful, only the single merged data -file will be output. If the merge cannot be completed an error will be -displayed. If N/no, no merge will be attempted and all datasets will be -output.''' - -columnar_params_full =\ -''' -*** delimiter *** - -{0} - -*** missing_values_from_a_given_column *** - -{1} - -*** delete_missing_values *** - -{2} - -*** columns_to_split *** - -{3} - -*** change_column_names *** - -{4} - -*** add_column_names_and_values *** - -{5} - -*** names_of_columns_to_be_removed *** - -{6} - -*** merge_data *** - -{7} - -*** subset *** - -{8} - -'''.format(delimiter, missing_values_from_a_given_column, -delete_missing_values, columns_to_split, change_column_names, -add_column_names_and_values, names_of_columns_to_be_removed, merge_data, -subset) - -columnar_params_med =\ -''' -*** delimiter *** - -{0} - -*** columns_to_split *** - -{1} - -*** change_column_names *** - -{2} - -*** add_column_names_and_values *** - -{3} - -*** names_of_columns_to_be_removed *** - -{4} - -*** merge_data *** - -{5} - -*** subset *** - -{6} - -'''.format(delimiter, columns_to_split, change_column_names, -add_column_names_and_values, names_of_columns_to_be_removed, merge_data, -subset) - -columnar_params_small =\ -''' -*** columns_to_split *** - -{0} - -*** change_column_names *** - -{1} - -*** add_column_names_and_values *** - -{2} - -*** names_of_columns_to_be_removed *** - -{3} - -*** merge_data *** - -{4} - -*** subset *** - -{5} - -'''.format(columns_to_split, change_column_names, -add_column_names_and_values, names_of_columns_to_be_removed, merge_data, -subset) - - - - -def check_columnar_params(params, script): - '''This function checks that all of the parameters required to convert - columnar data have the correct types. This test does not completely - validate parameters. Just check the first level type. - - Parameters - ---------- - params : dict - Parameter dictionary - script : str - Either 'grid', 'dense', 'columnar', or 'transect'. - - ''' - - # Can't check names_of_columns_to_be_removed because it can be a string. - if script == 'grid': - prms_types = [('columns_to_split', type([])), - ('change_column_names', type((2,))), - ('add_column_names_and_values', type({})), - ('merge_data', str), - ('subset', type({}))] - - elif script != 'columnar': - prms_types = [('delimiter' , type([])), - ('columns_to_split', type([])), - ('change_column_names', type((2,))), - ('add_column_names_and_values', type({})), - ('merge_data', str), - ('subset', type({}))] - - else: - prms_types = [('delimiter' , type([])), - ('missing_values_from_a_given_column', type({})), - ('delete_missing_values', type(True)), - ('columns_to_split', type([])), - ('change_column_names', type((2,))), - ('add_column_names_and_values', type({})), - ('merge_data', str), - ('subset', type({}))] - - for i, pair in enumerate(prms_types): - - if type(params[pair[0]]) != pair[1]: - if params[pair[0]] != None: - raise TypeError("Parameter '%s' must be a %s not a %s." % (pair[0], - str(pair[1]), str(type(params[pair[0]]))) + - " Please check the formatting of '%s': %s " % (pair[0], - str(params[pair[0]]))) - - - - - - diff --git a/macroeco/utils/make_metadata.py b/macroeco/utils/make_metadata.py deleted file mode 100644 index 1277880..0000000 --- a/macroeco/utils/make_metadata.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python - -''' -Makes minimal metadata for the user -''' - -import metadata_writer -import sys - - -#The user may want to make minimal metadata for multiple files -if len(sys.argv) == 1: - print "No data files included. Minimal metadata not made" -else: - for i in xrange(len(sys.argv)): - if i > 0: - metawriter = metadata_writer.MetaWriter(sys.argv[i]) - traitlist = [] - typelist = [] - print "Examining file '" + metawriter.filename + "'..." - for name in metawriter.column_names: - cat = raw_input("Is column name '" + name +\ - "' categorical? ") - if cat == "No" or cat == "no" or cat == "n" or\ - cat == "N": - types = (name, {'cat' : False}) - typelist.append(types) - spatial = raw_input("Is column name '" + name +\ - "' spatially explicit? ") - if spatial == "Yes" or spatial == "yes" or spatial == 'Y'\ - or spatial == 'y': - while True: - minimum = raw_input("Please enter the minimum value" +\ - " of column '" + name + "': ") - maximum = raw_input("Please enter the maximum value" +\ - " of column '" + name + "': ") - precision = raw_input("Please enter the precision" +\ - " of column '" + name + "': ") - try: - minimum = float(minimum) - maximum = float(maximum) - precision = float(precision) - break #This might not work - except ValueError: - print "Maximum, minimum, and precision must all" +\ - " be real numbers" - traits = (name, {'precision': str(precision), - 'minimum' : str(minimum),\ - 'maximum' : str(maximum)}) - traitlist.append(traits) - - else: - types = (name, {'cat' : True}) - typelist.append(types) - - metawriter.add_attribute_types(typelist) - metawriter.add_attribute_traits(traitlist) - metawriter.write_meta_data() - - - - - - - - - - - diff --git a/macroeco/utils/metadata_writer.py b/macroeco/utils/metadata_writer.py deleted file mode 100644 index aaa8eaa..0000000 --- a/macroeco/utils/metadata_writer.py +++ /dev/null @@ -1,293 +0,0 @@ -#!/usr/bin/env python - -''' -This module contains a minimal metadata writer class for quickly making -metadata - -''' - - -import xml.etree.ElementTree as ET -import os - -sub = ET.SubElement - - -class MetaWriter: - ''' - Writes a metadata file based on the given filename and user input - - ''' - - def __init__(self, datapath): - ''' - Class takes in a datafile path name and creates an xml tree using the - column heading of the recarray generated from the csv file. - - Parameters - ---------- - datapath : string - Datafile name - - ''' - assert datapath[-4:] == '.csv', "%s must end in .csv" % (datapath) - self.filename = datapath.split('.')[0] - fin = open(datapath, 'r') - self.column_names = fin.readline().strip().split(',') - fin.close() - self.root = ET.Element('eml:eml') - self.root.attrib = {'packageId' : self.filename, 'system' : 'knb', - "xmlns:eml" : "eml://ecoinformatics.org/eml-2.1.0", 'xmlns:xsi': - "http://www.w3.org/2001/XMLSchema-instance", "xsi:schemaLocation" - : "eml://ecoinformatics.org/eml-2.1.0 eml.xsd"} - self.dataset = sub(self.root, 'dataset') - self.title = sub(self.dataset, 'title') - self.title.text = "Data set " + os.path.split(datapath)[1] - - self.creator = sub(self.dataset, 'creator') - self.individualName = sub(self.creator, 'individualName') - self.surName = sub(self.individualName, 'surName') - self.surName.text = "None" - - self.contact = sub(self.dataset, 'contact') - self.individualName2 = sub(self.contact, 'individualName') - self.surName2 = sub(self.individualName2, 'surName') - self.surName2.text = "None" - - self.dataTable = sub(self.dataset, 'dataTable') - - self.entityName = sub(self.dataTable, 'entityName') - self.entityName.text = os.path.split(datapath)[1] - - self.physical = sub(self.dataTable, 'physical') - self.objectName = sub(self.physical, 'objectName') - self.objectName.text = os.path.split(datapath)[1] - self.size = sub(self.physical, 'size') - self.size.attrib = {'unit' : "byte"} - self.size.text = str(os.path.getsize(datapath)) - - # Nested in physical - self.dataFormat = sub(self.physical, 'dataFormat') - self.textFormat = sub(self.dataFormat, 'textFormat') - self.numHeaderLines = sub(self.textFormat, 'numHeaderLines') - self.numHeaderLines.text = "1" - self.recordDelimiter = sub(self.textFormat, 'recordDelimiter') - self.recordDelimiter.text = "#x0A" - self.attributeOrientation = sub(self.textFormat, 'attributeOrientation') - self.attributeOrientation.text = "column" - self.simpleDelimited = sub(self.textFormat, 'simpleDelimited') - self.fieldDelimiter = sub(self.simpleDelimited, 'fieldDelimiter') - self.fieldDelimiter.text = "," - - self.distribution = sub(self.physical, 'distribution') - self.online = sub(self.distribution, 'online') - self.url = sub(self.online, 'url') - self.url.text = "macroeco://" + os.path.split(datapath)[1] - - - self.attributeList = sub(self.dataTable, 'attributeList') - self.attributes = [] - self.attributeTypes = [] - for i, name in enumerate(self.column_names): - attribute = sub(self.attributeList, 'attribute') - attributeName = sub(attribute, 'attributeName') - attributeDefinition = sub(attribute, 'attributeDefinition') - attributeDefinition.text = "None" - measurementScale = sub(attribute, 'measurementScale') - - # Default Ordinal - attributeType = sub(measurementScale, 'ordinal') - nonNumericDomain = sub(attributeType,'nonNumericDomain') - textDomain = sub(nonNumericDomain, 'textDomain') - definition = sub(textDomain, 'definition') - definition.text = "None" - - attributeName.text = name - self.attributes.append(attribute) - self.attributeTypes.append(attributeType) - - self.numberOfRecords = sub(self.dataTable, 'numberOfRecords') - self.numberOfRecords.text = "Unknown" - - def add_attribute_types(self, typelist): - ''' - Sets the type of the attribute to either ordinal (categorical) or - interval (categorical). Initialized in constructor as ordinal. - - Parameters - ---------- - typelist : list - A list of tuples. Each tuple contains 2 elements: a string and a - dict. The dict must contain the keyword cat (categorical) or a - KeyError will be thrown. - - Example of typelist: - - [('x', {'cat' : True}), ('y' : {'cat' : True}), ('year', - {'cat' : False}] - - ''' - - for item in typelist: - for attribute in self.attributes: - tree = ET.ElementTree(attribute) - att = tree.findall('attributeName')[0] - if (att.text == item[0]): - measure = tree.findall('measurementScale')[0] - if item[1]['cat'] == True: - if len(measure.findall('interval')) == 1: - measure.remove(measure.find('interval')) - att_type = sub(measure, 'ordinal') - nonNumericDomain = sub(att_type,'nonNumericDomain') - textDomain = sub(nonNumericDomain, 'textDomain') - definition = sub(textDomain, 'definition') - definition.text = "None" - - elif len(measure.findall('ordinal')) == 1: - measure.remove(measure.find('ordinal')) - att_type = sub(measure, 'ordinal') - nonNumericDomain = sub(att_type,'nonNumericDomain') - textDomain = sub(nonNumericDomain, 'textDomain') - definition = sub(textDomain, 'definition') - definition.text = "None" - - elif item[1]['cat'] == False: - - if len(measure.findall('ordinal')) == 1: - measure.remove(measure.find('ordinal')) - att_type = sub(measure, 'interval') - unit = sub(att_type, 'unit') - standardUnit = sub(unit, 'standardUnit') - standardUnit.text = "dimensionless" - precision = sub(att_type, 'precision') - precision.text = "0" - numericDomain = sub(att_type, 'numericDomain') - numberType = sub(numericDomain, 'numberType') - numberType.text = 'natural' - - - elif len(measure.findall('interval')) == 1: - measure.remove(measure.find('interval')) - att_type = sub(measure, 'interval') - unit = sub(att_type, 'unit') - standardUnit = sub(unit, 'standardUnit') - standardUnit.text = "dimensionless" - precision = sub(att_type, 'precision') - precision.text = "0" - numericDomain = sub(att_type, 'numericDomain') - numberType = sub(numericDomain, 'numberType') - numberType.text = 'natural' - - def add_attribute_traits(self, traitlist): - ''' - Adds traits to the attributes contained in self.attributes as specified - by the traitlist. Traitlist is a list of tuples with each tuple - containting two elements: the attribute name (string) and a dictionary - of traits to be added to the attribute. If the type of the trait - ordinal, nothing will be changed. Only traits with type interval will - be appened too. - - Parameters - ---------- - traitlist : list - A list of 2 element tuples where the first element contains a - string and the second element conatins a dict. See example in - docstring. The only keywords that are recognized are maximum, - minimum, and precision. - - Example of traitlist: - - [('x', {'minimum' : '0', 'maximum' : '100'}), ('y', {'precision' : - '0.1'})] - - ''' - - for item in traitlist: - for attribute in self.attributes: - tree = ET.ElementTree(attribute) - child = tree.findall('attributeName')[0] - if child.text == item[0]: - #TODO:Cleaner way to do this than with if? - measure = tree.findall('measurementScale')[0] - if len(measure.findall('interval')) == 1: - interval = measure.findall('interval')[0] - for key in item[1].iterkeys(): - if key == 'precision': - prec = interval.findall('precision') - if len(prec) == 0: - precision = sub(interval, 'precision') - precision.text = str(item[1][key]) - elif len(prec) == 1: - prec[0].text = str(item[1][key]) - elif key == 'minimum': - numericDomain =\ - interval.findall('numericDomain')[0] - bnd = numericDomain.findall('bounds') - if len(bnd) == 0: - bounds = sub(numericDomain, 'bounds') - minimum = sub(bounds, 'minimum') - minimum.attrib = {'exclusive' : - 'false'} - minimum.text = str(item[1][key]) - elif len(bnd) == 1: - mins = bnd[0].findall('minimum') - if len(mins) == 0: - minimum = sub(bnd[0], 'minimum') - minimum = sub(bnd[0], 'minimum') - minimum.attrib = {'exclusive' : - 'false'} - minimum.text = str(item[1][key]) - elif len(mins) == 1: - bnd[0].remove(mins[0]) - minimum = sub(bnd[0], 'minimum') - minimum.attrib = {'exclusive' : - 'false'} - minimum.text = str(item[1][key]) - elif key == 'maximum': - numericDomain =\ - interval.findall('numericDomain')[0] - bnd = numericDomain.findall('bounds') - if len(bnd) == 0: - bounds = sub(numericDomain, 'bounds') - maximum = sub(bounds, 'maximum') - maximum.attrib = {'exclusive' : - 'false'} - maximum.text = str(item[1][key]) - elif len(bnd) == 1: - maxs = bnd[0].findall('maximum') - if len(maxs) == 0: - maximum = sub(bnd[0], 'maximum') - maximum.attrib = {'exclusive' : - 'false'} - maximum.text = str(item[1][key]) - elif len(maxs) == 1: - bnd[0].remove(maxs[0]) - maximum = sub(bnd[0], 'maximum') - maximum.attrib = {'exclusive' : - 'false'} - maximum.text = str(item[1][key]) - - - - def write_meta_data(self, name=None): - ''' - Writes out the xml tree that is contained in self.root and saves and - .xml file in the currect working directory under the given filename. If - no name is given save the xml as the same name as the input file. - - - ''' - - tree = ET.ElementTree(self.root) - if name == None: - tree.write(self.filename + '.xml') - else: - tree.write(name + '.xml') - - - - - - - - diff --git a/macroeco/utils/test_metadata_writer.py b/macroeco/utils/test_metadata_writer.py deleted file mode 100644 index 593f5c7..0000000 --- a/macroeco/utils/test_metadata_writer.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/python -#Testing metadata_writer.py - -import unittest -from metadata_writer import * -import numpy as np -import xml.etree.ElementTree as ET - -import os -gcwd = os.getcwd #get current directory -pd = os.path.dirname #get parent directory -chdir = os.chdir #change directories -jp = os.path.join - -class TestMetadataWriter(unittest.TestCase): - '''Tests the MetaWriter class''' - - def setUp(self): - - self.meta1 = open('meta1.csv', 'w') - self.meta1.write('''cell,row,column,spp,year - 0, 1, 2, 3, 4''') - self.meta1.close() - - def tearDown(self): - os.remove('meta1.csv') - - def test_metawriter(self): - mt = MetaWriter('meta1.csv') - att = [('row', {'maximum' : '2', 'minimum' : '0', 'precision' :\ - '0.1'}), ('column', {'maximum' : '45', 'minimum' :\ - '0', 'precision' : '1'})] - - # Check that all attributes are in xml tree - self.assertTrue(len(mt.attributeList.findall('./')) == 5) - - # Check that all types are ordinal by default - measure = mt.attributeList.findall('./attribute/measurementScale') - for i, m in enumerate(measure): - temp = m.findall('./') - self.assertTrue(temp[0].tag == 'ordinal') - - # Check that it adds correct attribute types - types = [('cell', {'cat' : True}), ('row', {'cat' : False}), ('column', {'cat' - : False}), ('spp' , {'cat' : True})] - - mt.add_attribute_types(types) - order = ['ordinal', 'interval', 'interval', 'ordinal', 'ordinal'] - for i, att in enumerate(mt.attributes): - temp = att.findall('./measurementScale/' + order[i]) - self.assertTrue(len(temp) == 1) - - # Check that it overwrites types if they are changed - types = [('cell', {'cat' : False}), ('row', {'cat' : True}), ('column', {'cat' - : True}), ('spp' , {'cat' : False})] - - mt.add_attribute_types(types) - - mt.add_attribute_types(types) - order = ['interval', 'ordinal', 'ordinal', 'interval', 'ordinal'] - for i, att in enumerate(mt.attributes): - temp = att.findall('./measurementScale/' + order[i]) - self.assertTrue(len(temp) == 1) - - # Check that max, min and precision are set correctly - - types = [('cell', {'cat' : True}), ('row', {'cat' : False}), ('column', {'cat' - : False}), ('spp' , {'cat' : True})] - - mt.add_attribute_types(types) - - att_list = [('row', {'minimum' : 0, 'maximum' : 400, 'precision' : 3, - 'random' : 'harry'}), ('column', {'maximum' : 5}), ('spp', {'precision' - : 4})] - - mt.add_attribute_traits(att_list) - - # spp should have no precision even though we tried to add it - have = mt.attributeList.findall(".//attributeName") - names = [nm.text for nm in have] - ind = names.index('spp') - maybe =\ - mt.attributes[ind].findall('./measurementScale/ordinal/precision') - self.assertTrue(len(maybe) == 0) - - # cell should have no precision - have = mt.attributeList.findall(".//attributeName") - names = [nm.text for nm in have] - ind = names.index('cell') - maybe =\ - mt.attributes[ind].findall('./measurementScale/ordinal/precision') - self.assertTrue(len(maybe) == 0) - - # Precision of row should be three - have = mt.attributeList.findall(".//attributeName") - names = [nm.text for nm in have] - ind = names.index('row') - maybe =\ - mt.attributes[ind].findall('./measurementScale/interval/precision') - self.assertTrue(maybe[0].text == "3") - - # Precision of column should be 0 - have = mt.attributeList.findall(".//attributeName") - names = [nm.text for nm in have] - ind = names.index('column') - maybe =\ - mt.attributes[ind].findall('./measurementScale/interval/precision') - self.assertTrue(maybe[0].text == "0") - - # Maximum is set right - have = mt.attributeList.findall(".//attributeName") - names = [nm.text for nm in have] - ind = names.index('column') - maybe =\ - mt.attributes[ind].findall('./measurementScale/interval/numericDomain/bounds/maximum') - self.assertTrue(maybe[0].text == "5") - - - -if __name__ == '__main__': - unittest.main() diff --git a/macroeco/utils/test_workflow.py b/macroeco/utils/test_workflow.py deleted file mode 100644 index 0aeb411..0000000 --- a/macroeco/utils/test_workflow.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/python - -import os -import unittest -from macroeco.utils import workflow - - -# Cases for future testing: -# params file has one interactive run, user changes values. -# params file has plural interactive runs (complicated dialog ahoy). -# No params file. Dialog, write, reload. -# Params file doesn't match ask. Dialog, write, reload, check against ask. -## workflow.xml proper subset of ask -## Neither a proper subset of the other -# Types of param: string, int, float, lists of those; mixed-type list (ick). - - -class ParamfileTestCase(unittest.TestCase): - - def setUp(self): - self.cwd = os.getcwd() + '/' - - self.pf = open('parameters.xml', 'w') - self.pf.write(""" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - """) - self.pf.close() - - def tearDown(self): - pass - os.remove(workflow.paramfile) - - def test_emptyask(self): - pa = workflow.Parameters('nonexistantrun', None, {}, {}) - self.assertEqual(pa.params, {}) - self.assertEqual(pa.interactive, False) - - def test_NIrunExists(self): - req_params = {'size': 'descripsize', 'species': 'descripspp'} - pa = workflow.Parameters('RunExists', None, req_params, {}) - self.assertTrue(len(pa.params) == 1) - self.assertTrue(set(req_params.keys()).issubset(\ - set(pa.params['ParamfileTestCase'].keys()))) - self.assertTrue(pa.interactive == False) - - run = pa.params['ParamfileTestCase'] - self.assertTrue(run['size']*run['layers'][1] == 3*4.4) - - def test_MultipleNIRunsExist(self): - pa = workflow.Parameters('ManyNIRuns', None, {}, {}) - self.assertEqual(len(pa.params), 2) - self.assertEqual(pa.params['FirstCase']['size'], 4.4) - self.assertEqual(pa.params['FirstCase']['species'], 'E. coli') - self.assertEqual(pa.params['FirstCase']['layers'], [0,3,12]) - self.assertEqual(pa.params['SecondCase']['size'], 2.2) - self.assertEqual(pa.params['SecondCase']['species'], 'H. sapiens') - self.assertEqual(pa.params['SecondCase']['layers'], [5]) - - def test_UnnamedRunErrors(self): - pa = workflow.Parameters('Unnamed', None, {}, {}) - self.assertEqual(len(pa.params), 2) - self.assertEqual(pa.params['run1']['size'], 4.4) - self.assertEqual(pa.params['run1']['species'], 'E. coli') - self.assertEqual(pa.params['run1']['layers'], [0,3,12]) - self.assertEqual(pa.params['run2']['size'], 2.2) - self.assertEqual(pa.params['run2']['species'], 'H. sapiens') - self.assertEqual(pa.params['run2']['layers'], [5]) - - def test_InteractiveRun(self): - pa = workflow.Parameters('Interactive', None, {}, {}) - self.assertTrue(pa.interactive == True) - diff --git a/macroeco/utils/workflow.py b/macroeco/utils/workflow.py deleted file mode 100644 index 77fa462..0000000 --- a/macroeco/utils/workflow.py +++ /dev/null @@ -1,547 +0,0 @@ -#!/usr/bin/python - -''' -Manages the details of a reproducible workflow within macroeco. Main Workflow -class is called with one argument, required_params, and the surrounding script -must be called with a single sys.argv with the output directory. - -Classes -------- -- `Workflow` -- tracks the analysis, data requested, and parameters; maps sites -- `Parameters` -- finds/asks for and stores run names and parameters -''' - -import xml.etree.ElementTree as etree -import sys, os, logging -import matplotlib.pyplot as plt -from macroeco.data import Metadata - - -paramfile = 'parameters.xml' # Parameter file found in output dir -logfile = 'logfile.txt' # Logfile to save in output dir - - -class Workflow: - ''' - Manages the details of a reproducible workflow with macroeco scripts. - - Arguments - --------- - required_params : dictionary - Parameters needed for analysis, in form of - 'parameter_name':'short_description'. All of these parameters must be - present in params file in output directory, or analysis will not run. - This argument is empty only when no data or parameters are required for - a script to run. - clog : bool - Whether to log to console in addition to file, False by default - short_output_name : bool - Whether to use the run-name alone to name output. False by default. - - Attributes - ---------- - script_name : string - Name of script originating the workflow - output_path : string - Path to output directory - interactive : bool - Whether the script can pause for user interaction - runs : dict - If parameters are needed, sets of parameter values are named runs - ''' - - def __init__(self, required_params={}, optional_params={}, - clog=False, svers=None, short_output_name=False): - - # Store script name from command line call - script_path, script_extension = os.path.splitext(sys.argv[0]) - self.script_name = os.path.split(script_path)[-1] - self.script_vers = svers - self.short_output_name = short_output_name - - # Store output directory path - contains params file, log, results - # TODO: Make more robust to non-absolute path entries - output_path = os.getcwd() - self.output_path = output_path - - # Prepare logger - logging.basicConfig(filename=logfile, # Add file logging - level=logging.INFO, format=('%(asctime)s | ' - '%(levelname)s | %(filename)s:%(lineno)d | ' - '%(message)s'), datefmt='%H:%M:%S') - - if clog: # Add console logging - console = logging.StreamHandler() - console.setLevel(logging.INFO) - format = logging.Formatter('%(levelname)-8s %(message)s') - console.setFormatter(format) - logging.getLogger('').addHandler(console) - - def excepthook(*args): # Catch errors to log - logging.getLogger().error('Analysis Stopped', exc_info=args) - else: - def excepthook(*args): # Catch errors to log + stderr - logging.getLogger().error('Analysis Stopped', exc_info=args) - sys.__excepthook__(*args) # Show err in console if clog False - - sys.excepthook = excepthook # Define error handler as above - - logging.captureWarnings(True) # Catch warnings - - logging.debug('Creating workflow object') - - # Get parameters from file, including data paths - assert type(required_params) == type({}), ('Required params must be a' - ' dict.') - self.parameters = Parameters(self.script_name, self.script_vers, - required_params, optional_params) - self.interactive = self.parameters.interactive - - - def single_datasets(self): - ''' - Generator that yields data files and descriptive parameters. - - Special parameter 'data_path' is a list of locations of data files to - use for analysis - if present, map of sites will be generated for each - run. - - Yields - ------ - data_path : string - Path to data to analyze, relative to current working directory - output_ID : string - Concatenates script, run, and dataset identifiers - run_params : dict - Dictionary of parameters for each script_name and run - ''' - - def clean_name(fp): # Extract file name from path - return os.path.splitext(os.path.split(fp)[-1])[0] - - # Run script on all runs (parameter sets), and data sets - for run_name in self.parameters.params.keys(): - # TODO: Check for output_ID conflicts (must be unique) - - # Check if data_paths in params. If not, add one empty data_path - # for the loop below. If so, make a map. - if len(self.parameters.data_path) == 0: - logging.debug(('No data paths given for run %s, no map of ' - 'sites created') % run_name) - self.parameters.data_path[run_name] = [''] - else: - make_map(self.parameters.data_path[run_name], run_name) - - # Loop through each dataset and yield values for dataset and run - for data_path in self.parameters.data_path[run_name]: - abs_data_path = os.path.abspath(os.path.join(self.output_path, - data_path)) - if self.short_output_name: - print('Using short output name in single_datasets:') - output_ID = run_name - print(output_ID) - else: - output_ID = '_'.join([self.script_name, - run_name, clean_name(data_path)]) - logging.info('Beginning %s' % output_ID) - yield (abs_data_path, output_ID, - self.parameters.params[run_name]) - - def all_datasets(self): - ''' Generator that yields a list of data files and descriptive - parameters for each run. - - Yields - ------ - data_path : list - A list of paths to data to analyze - output_ID : list - A list of IDs that concatenate script, run, and dataset. - run_params : dict - Dictionary of parameters for each script_name and run - - ''' - - def clean_name(fp): # Extract file name from path - return os.path.splitext(os.path.split(fp)[-1])[0] - - # Run script on all runs (parameter sets), and data sets - for run_name in self.parameters.params.keys(): - # TODO: Check for output_ID conflicts (must be unique) - - # Check if data_paths in params. If not, add one empty data_path - # for the loop below. If so, make a map. - if len(self.parameters.data_path) == 0: - logging.debug(('No data paths given for run %s, no map of ' - 'sites created') % run_name) - self.parameters.data_path[run_name] = [''] - else: - make_map(self.parameters.data_path[run_name], run_name) - - abs_data_paths = [os.path.abspath(os.path.join(self.output_path, - data_path)) for data_path in self.parameters. - data_path[run_name]] - if self.short_output_name: - print('Using short output name in all_datasets:') - output_IDs = ['_'.join([run_name, clean_name(data_path)]) for - data_path in self.parameters.data_path[run_name]] - print(output_IDs) - else: - output_IDs = ['_'.join([self.script_name, run_name, - clean_name(data_path)]) for data_path in - self.parameters.data_path[run_name]] - logging.info('Beginning %s script' % self.script_name) - yield (abs_data_paths, output_IDs, - self.parameters.params[run_name], run_name, - self.script_name) - - - - -class Parameters: - ''' - Load parameters from parameter file in current working directory - and make available as self.params. - Checks that all required_params are present and loaded. - - Arguments - --------- - script_name : string - Name of script originating the workflow - required_params : dictionary - Parameters needed for analysis, in form of - 'parameter_name':'short_description'. All of these parameters must be - present in params file in output directory for this script_name and - run, or analysis will not run. This argument is empty only when no data - or parameters are required for a script to run. - - Attributes - ---------- - script_name : string - Name of script originating the workflow - script_vers : string - Version of script originating the workflow - interactive : bool - Whether the script can pause for user interaction - params : dict - Dictionary of dictionaries, with each outer key a run name and each - outer value a dictionary of parameter names and values for each run. - data_path : dict - Dictonarity where keys are run names and values are lists of data paths - associated with each run. - - ''' - - def __init__(self, script_name, script_vers, required_params, - optional_params, output_path=False): - - # Store initial attributes - self.script_name = script_name - self.script_vers = script_vers - self.interactive = False - self.params = {} - self.data_path = {} - if not output_path: - output_path = os.getcwd() - - # Check that parameter file exists, if not use default values - try: - pf = open(paramfile, 'r') - pf.close() - except IOError: - logging.info(('No parameter file found at %s, proceeding without ' - 'parameters') % output_path) - self.params[''] = {} - self.data_path[''] = {} - self.interactive = False - return - - # Read parameter file - logging.info('Reading parameters from %s' % os.path.join(output_path, - paramfile)) - self.read_from_xml() - - # Check that all required parameters present in all runs - if not self.required_params_present(required_params): - raise IOError('Required parameters missing') - - # If optional params are missing, set to default - self.set_optional_params(optional_params, script_name) - - logging.info('Parameters: %s' % str(self.params)) - logging.info('Data: %s' % str(self.data_path)) - - # Evaluate param values into appropriate types - self.eval_params() - - - def read_from_xml(self): - ''' Read parameters from xml file into self.params dictionary. ''' - - # Define class for checking keys - class AllEntities: - def __getitem__(self, key): - return key - - # Declare parser object - # TODO: Without next line, works in iPython, console, not script ?? - parser = etree.XMLParser() - parser.parser.UseForeignDTD(True) - parser.entity = AllEntities() - - # Try to open paramfile from output_path - # TODO: Integration test - try: - pml = etree.parse(paramfile, parser=parser).getroot() - except etree.ParseError: - raise IOError('ParseError trying to read %s' % paramfile) - except: - raise - - # Create params dictionary - if len(pml) == 0: # Error if no analyses in param file - raise IOError('Parameter file %s contains no valid analyses' % - paramfile) - for analysis in pml: # Loop analyses looking for script_name - if analysis.get('script_name') == self.script_name: - - if 'version' in analysis.attrib: # Set version - vers = analysis.get('version') - if self.script_vers: # If got script_vers, check - if float(vers) != float(self.script_vers): - logging.warning(('Script version does not match ' - 'version in parameters. ' - 'Continuing, but may fail.')) - - if 'interactive' in analysis.attrib: # Set interactive - ia = analysis.get('interactive') - if ia in ['T', 'True', 't', 'true']: - self.interactive = True - else: - self.interactive = False - else: - self.interactive = False - - if len(analysis) == 0: # Error if no runs in analysis - raise IOError(('Analysis found for this script, but no ' - 'valid runs found')) - - run_counter = 1 - for run in analysis.getchildren(): # Loop runs - run_name = run.get('name') - if run_name is None: - run_name = 'run' + str(run_counter) - run_counter += 1 - self.params[run_name] = {} - self.data_path[run_name] = [] - for elt in run.getchildren(): # Loop params in run - if elt.tag == 'param': - param = elt.get('name') - value = elt.get('value') - self.params[run_name][param] = value - if elt.tag == 'data': - data_type = elt.get('type') - data_location = elt.get('location') - if data_location == 'system': - # User responsible for sys paths, security, etc - prepend = '' - elif data_location == 'archival': - prepend = os.path.join('..','..', - 'archival') - else: - prepend = os.path.join('..','..','data', - 'formatted') - if data_type == '' or data_type == None: - logging.warning(('No data type specified,' - ' assuming .csv')) - data_type = 'csv' - # Remove any data extension - if data_type == 'csv': - - data_path = convert_filename(elt, prepend, - 'csv') - self.data_path[run_name].append(data_path) - - elif data_type == 'txt': - data_path = convert_filename(elt, prepend, - 'txt') - self.data_path[run_name].append(data_path) - - elif data_type == 'sql': - data_path = convert_filename(elt, prepend, - 'sql') - self.data_path[run_name].append(data_path) - elif data_type == 'db': - data_path = convert_filename(elt, prepend, - 'db') - self.data_path[run_name].append(data_path) - else: - logging.error('Data type {!s} not yet handled; ' - 'not using this data.'.format( - data_type)) - - def required_params_present(self, req_params): - ''' Check if any required parameters missing from any runs. ''' - - status = 1 - for run_name in self.params.keys(): - run_params = self.params[run_name] - if not set(req_params.keys()).issubset(set(run_params.keys())): - logging.error('In run {!s}, missing parameters {!s}'.format( - run_name, set(req_params.keys()).difference(set(run_params.keys())))) - status = 0 - return status - - def set_optional_params(self, opt_params, script_name): - ''' Set optional params with default values if params are missing''' - for run_name in self.params.keys(): - run_params = self.params[run_name] - for optpar in opt_params: - if not optpar in run_params: - logging.info("Default value for {!s} in {!s}: {!s}".format(optpar, - script_name, str(opt_params[optpar][1]))) - run_params[optpar] = opt_params[optpar][1] - - def eval_params(self): - ''' - Attempts to evaluate parameters to appropriate types. - - If eval() fails, parameter will stay a string, possibly leading to - cryptic errors later if there is a typo in a param value. - ''' - - for run_name in self.params.keys(): - for param_name in self.params[run_name].keys(): - try: - value = eval(self.params[run_name][param_name]) - self.params[run_name][param_name] = value - value_type = str(type(value)).split("'")[1] - logging.debug('In run %s, parameter %s evaluated to %s' % - (run_name, param_name, value_type)) - except: - logging.debug('In run %s, parameter %s left as string' % - (run_name, param_name)) - - -def make_map(data_paths, run_name, whole_globe=False): - ''' - Makes a map of all sites in run. - - Parameter - --------- - data_paths : list - Paths to data files (csv's). Data location will be extracted from - corresponding xml metadata file. - run_name : str - Name of run, used as name of map file. - whole_globe : bool - If True, map is entire globe. If False, map is "zoomed in" on data - locations. - - Returns - ------- - map_created : bool - True if file was created, False if a file already existed and none was - created. - - Notes - ----- - Map will be given the name of a run. If multiple runs have the same name, - only the map associated with the first run of that name will be saved. - - The label for each site will be the data file base name - (e.g., LBRI_2000.csv and LBRI.csv will be LBRI_2000 and LBRI respectively). - ''' - - # Check if Basemap present - if not, log and return - try: - from mpl_toolkits.basemap import Basemap - except: - logging.debug('Basemap module is not available, no map of data ' + - 'locations can be created') - return False - - # Set map_name - map_name = 'map_' + run_name + '.png' - - # TODO: Check if run_name is unique - # Check if map with this run_name already exists - if os.path.isfile(map_name): - logging.debug('Map with this run name already exists. New map ' + - 'overwriting old one.') - - # Get lat, long, and name of each data set - lats = [] - lons = [] - names = [] - - for path in data_paths: - temp = list(os.path.split(path)) - temp[1] = temp[1].split('.')[0] + '.xml' - x = os.path.join(temp[0], temp[1]) - - try: - meta = Metadata(x, {}) - bounds = meta.get_physical_coverage() - lats.append(bounds[0]) - lons.append(bounds[1]) - - fname, fext = os.path.splitext(os.path.split(path)[-1]) - names.append(fname) # First 4 letters of data set name - except: - logging.info('No location data found in %s, no map point ' - 'added.' % x) - - # If no valid location data, return without making a map - if len(names) == 0: - return False - - # Set up map - logging.debug('Creating map for run %s' % run_name) - if whole_globe: - m = Basemap(projection='cyl', resolution='i') - else: - # 10 degree buffer around min and max lat/long - m = Basemap(projection='cyl', lat_0=50, lon_0=-100, - llcrnrlon=min(lons)-10, llcrnrlat=min(lats)-10, - urcrnrlon=max(lons)+10, urcrnrlat=max(lats)+10, - resolution='l') - - # Draw features - m.bluemarble() - m.drawcoastlines() - m.drawcountries() - m.drawmapboundary() - - # Add sites - x, y = m(lons, lats) - m.plot(x, y, 'yo') - for n, xpt, ypt in zip(names,x,y): - if n == 'BCIS': ypt += 1 # Manual Cleanup for crowded areas - if n == 'SHER': ypt += 2 - plt.text(xpt+.5,ypt+.5,n,color='yellow') - - plt.savefig(map_name) - plt.close() - return True - - -def convert_filename(elt, prepend, ext): - '''Parses xml tree to return filename - - Parameters - ---------- - elt : Etree - XML tree object - prepend : str - String to be appended - ext : str - File type, i.e. csv, txt, sql, db - ''' - - file_name = elt.find('file').text.split('.')[0] - - directory = elt.find('directory').text - data_file = os.path.extsep.join((file_name, ext)) - data_path = os.path.join(prepend, directory, data_file) - return data_path From f2f831ea5dfb8e4792a9a179b5eb3d42c8d74c46 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 15:03:39 -0700 Subject: [PATCH 169/343] Remove macroeco from doc requirements and macroeco.utils from setup --- doc/requirements.txt | 1 - setup.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/requirements.txt b/doc/requirements.txt index 4a38cd3..1ac4629 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -6,4 +6,3 @@ configparser decorator twiggy numpydoc -macroeco diff --git a/setup.py b/setup.py index 4cd08b7..af23c25 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,6 @@ description = 'Analysis of ecological patterns in Python', author = 'Justin Kitzes, Mark Wilber, Chloe Lewis', url = 'https://github.com/jkitzes/macroeco', - packages = ['macroeco', 'macroeco.utils'], + packages = ['macroeco'], license = 'BSD', ) From cceedb2016a9b0252ad37832fd4d9e3412a07dd2 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 15:33:56 -0700 Subject: [PATCH 170/343] Empirical cdc returns a data frame --- macroeco/empirical/empirical.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index a003779..b90639e 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -1297,7 +1297,7 @@ def _product(*args, **kwds): def empirical_cdf(data): """ - Generates an empirical cdf from empirical data + Generates an empirical cdf from data. Parameters ---------- @@ -1306,17 +1306,17 @@ def empirical_cdf(data): Returns -------- - : array - The empirical cdf corresponding to the inputted data + : DataFrame + Columns 'data' and 'ecdf'. 'data' contains ordered data and 'ecdf' + contains the corresponding ecdf values for the data. """ - # TODO: This should return sorted data also, otherwise trying to match the - # input data to output does not correspond (result is sorted, data is not - # necessarily). vals = pd.Series(data).value_counts() ecdf = pd.DataFrame(data).set_index(keys=0) probs = pd.DataFrame(vals.sort_index().cumsum() / np.float(len(data))) - ecdf = ecdf.join(probs) + ecdf = ecdf.join(probs, how="right") + ecdf = ecdf.reset_index() + ecdf.columns = ['data', 'ecdf'] - return np.array(ecdf[0]) + return ecdf From e33e726aba68932456b221fef7cbbae37eb92b2a Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 15:34:21 -0700 Subject: [PATCH 171/343] Commented out old unit tests --- macroeco/empirical/xest_empirical.py | 1164 +++++++++++++------------- 1 file changed, 592 insertions(+), 572 deletions(-) diff --git a/macroeco/empirical/xest_empirical.py b/macroeco/empirical/xest_empirical.py index c1fd442..76f171d 100644 --- a/macroeco/empirical/xest_empirical.py +++ b/macroeco/empirical/xest_empirical.py @@ -3,576 +3,596 @@ ''' from __future__ import division -import unittest -import os -gcwd = os.getcwd -pd = os.path.dirname -jp = os.path.join -from empirical import * -import numpy as np -class TestPatch(unittest.TestCase): - - def setUp(self): - self.xyfile5 = open('xyfile5.csv','w') - self.xyfile5.write('''spp_code, x, y, count -grt, .1, .1, 2 -grt, .1, .2, 1 -grt, .1, .3, 1 -rty, .1, .2, 1 -rty, .2, .3, 2''') - self.xyfile5.close() - self.xymeta5 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', - 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, - ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio'} - - self.pat1 = Patch('xyfile5.csv') - # Line below sets metadata manually-no metadata file loaded - self.pat1.data_table.meta = self.xymeta5 - - self.xyfile6 = open('xyfile6.csv', 'w') - self.xyfile6.write('''spp_code, x, y, count -a, 0, 0, 1 -b, 0, 0, 1 -c, 0, 0, 0 -d, 0, 0, 3 -a, 0, 1, 0 -b, 0, 1, 4 -c, 0, 1, 0 -d, 0, 1, 1 -a, 1, 0, 1 -b, 1, 0, 0 -c, 1, 0, 3 -d, 1, 0, 1 -a, 1, 1, 0 -b, 1, 1, 1 -c, 1, 1, 3 -d, 1, 1, 1''') - self.xyfile6.close() - self.xymeta6 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', - 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, - ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio'} - self.pat2 = Patch('xyfile6.csv') - self.pat2.data_table.meta = self.xymeta6 - - self.xyfile7 = open('xyfile7.csv', 'w') - self.xyfile7.write('''spp_code, x, y, count -tery, 1, 1, 1 -1, 1, 1, 1 -2, 1, 1, 0 -3, 1, 1, 3 -0, 1, 2, 0 -1, 1, 2, 4 -2, 1, 2, 0 -tery, 1, 2, 1 -0, 2, 1, 1 -1, 2, 1, 0 -2, 2, 1, 3 -3, 2, 1, 1 -tery, 2, 2, 0 -1, 2, 2, 1 -2, 2, 2, 3 -3, 2, 2, 1''') - self.xyfile7.close() - self.xymeta7 = {('x', 'maximum'): 2, ('x', 'minimum'): 1, ('x', - 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 2, - ('y', 'minimum'): 1, ('y', 'precision'): 1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio'} - self.pat3 = Patch('xyfile7.csv') - self.pat3.data_table.meta = self.xymeta7 - - self.xyfile8 = open('xyfile8.csv', 'w') - self.xyfile8.write('''spp_code, x, y, count -0, 0, 0, 1 -1, 0, 0, 1 -2, 0, 0, 0 -3, 0, 0, 3 -0, 0, 1, 0 -1, 0, 1, 4 -2, 0, 1, 0 -3, 0, 1, 1 -0, 1, 0, 1 -1, 1, 0, 0 -2, 1, 0, 3 -3, 1, 0, 1 -0, 1, 1, 0 -1, 1, 1, 1 -2, 1, 1, 3 -3, 1, 1, 1 -0, 2, 0, 0 -1, 2, 0, 0 -2, 2, 0, 2 -3, 2, 0, 4 -0, 2, 1, 0 -1, 2, 1, 0 -2, 2, 1, 0 -3, 2, 1, 1''') - self.xyfile8.close() - self.xymeta8 = {('x', 'maximum'): 2, ('x', 'minimum'): 0, ('x', - 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, - ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio'} - self.pat4 = Patch('xyfile8.csv') - self.pat4.data_table.meta = self.xymeta8 - self.xyfile9 = open('xyfile9.csv','w') - self.xyfile9.write('''spp_code, x, y, count, energy, mass -grt, .1, .1, 2, 1, 34 -grt, .1, .2, 1, 2, 12 -grt, .1, .3, 1, 3, 23 -rty, .1, .2, 1, 4, 45 -rty, .2, .3, 1, 5, 110''') - self.xyfile9.close() - self.xymeta9 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', - 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, - ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio'} - - self.pat5 = Patch('xyfile9.csv') - self.pat5.data_table.meta = self.xymeta9 - self.xyfile10 = open('xyfile10.csv', 'w') - self.xyfile10.write('''spp_code, x, y, count -a, 0, 0, 1 -b, 0, 0, 1 -d, 0, 0, 3 -b, 0, 1, 4 -d, 0, 1, 1 -a, 1, 0, 1 -c, 1, 0, 3 -d, 1, 0, 1 -b, 1, 1, 1 -c, 1, 1, 3 -d, 1, 1, 1''') - self.xyfile10.close() - self.xymeta10 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', - 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, - ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio'} - self.pat6 = Patch('xyfile10.csv') - self.pat6.data_table.meta = self.xymeta10 - self.xyfile11 = open('xyfile11.csv', 'w') - self.xyfile11.write('''spp_code, x, y, count, reptile -a, 0, 0, 1, lizard -b, 0, 0, 1, lizard -d, 0, 0, 3, snake -b, 0, 1, 4, lizard -d, 0, 1, 1, turtle -a, 1, 0, 1, snake -c, 1, 0, 3, lizard -d, 1, 0, 1, snake -b, 1, 1, 1, tuatara -c, 1, 1, 3, turtle -d, 1, 1, 1, snake''') - self.xyfile11.close() - self.xymeta11 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', - 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, - ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio', ('reptile', 'maximum') - : None, ('reptile', 'minimum') : None, ('reptile', 'precision'):None, - ('reptile', 'type') : 'ordinal'} - self.pat7 = Patch('xyfile11.csv') - self.pat7.data_table.meta = self.xymeta11 - - self.xyfile12 = open('xyfile12.csv', 'w') - self.xyfile12.write('''spp_code, x, y, count -3, 0, 0, 3 -3, 0, 1, 1 -2, 0, 2, 3 -1, 0, 3, 8 -3, 1, 0, 1 -3, 1, 1, 1 -0, 1, 2, 5 -3, 1, 3, 1 -2, 2, 0, 1 -1, 2, 1, 3 -1, 2, 2, 6 -0, 2, 3, 1 -1, 3, 0, 9 -2, 3, 1, 1 -0, 3, 2, 3 -3, 3, 3, 1''') - self.xyfile12.close() - self.xymeta12 = {('x', 'maximum'): 3, ('x', 'minimum'): 0, ('x', - 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 3, - ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', - ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', - ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', - 'precision'): None, ('count', 'type'): 'ratio'} - self.pat8 = Patch('xyfile12.csv') - self.pat8.data_table.meta = self.xymeta12 - - # Data file with three count colums, unique row for each species - self.xyfile13 = open('xyfile13.csv', 'w') - self.xyfile13.write('''spp_code, order, plot1, plot2, plot3 -a, pred, 0, 0, 0 -b, pred, 0, 0, 1 -c, pred, 0, 1, 0 -d, pred, 0, 2, 3 -e, scav, 0, 1, 0 -f, scav, 0, 1, 4''') - self.xyfile13.close() - self.xymeta13 = {('spp_code', 'maximum'): None, - ('spp_code', 'minimum'): None, - ('spp_code', 'precision'): None, - ('spp_code', 'type'): 'ordinal', - ('order', 'maximum'): None, - ('order', 'minimum'): None, - ('order', 'precision'): None, - ('order', 'type'): 'ordinal', - ('plot1', 'maximum'): None, - ('plot1', 'minimum'): None, - ('plot1', 'precision'): None, - ('plot1', 'type'): 'ratio', - ('plot2', 'maximum'): None, - ('plot2', 'minimum'): None, - ('plot2', 'precision'): None, - ('plot2', 'type'): 'ratio', - ('plot3', 'maximum'): None, - ('plot3', 'minimum'): None, - ('plot3', 'precision'): None, - ('plot3', 'type'): 'ratio'} - self.pat9 = Patch('xyfile13.csv') - self.pat9.data_table.meta = self.xymeta13 - - - - - def tearDown(self): - os.remove('xyfile5.csv') - os.remove('xyfile6.csv') - os.remove('xyfile7.csv') - os.remove('xyfile8.csv') - os.remove('xyfile9.csv') - os.remove('xyfile10.csv') - os.remove('xyfile11.csv') - os.remove('xyfile12.csv') - os.remove('xyfile13.csv') - - # - # init and set_attributes - # - - def test_patch_init(self): - - # Test entire table is loaded - self.assertTrue(len(self.pat1.data_table.table) == 5) - self.assertTrue(len(self.pat2.data_table.table) == 16) - - # Test that subsetting works - pat = Patch('xyfile6.csv', {'spp_code': [('!=','a'), ('!=', 'b'), - ('!=','c')]}) - self.assertTrue(np.all(pat.data_table.table['spp_code'] == 'd')) - pat = Patch('xyfile7.csv', {'spp_code': ('==', "tery")}) - self.assertTrue(sum(pat.data_table.table['count']) == 2) - - # Testing that metadata was set correctly - self.assertTrue(self.pat1.data_table.meta[('x', 'maximum')] == .2) - - def test_sad(self): - - # Test correct result with 'whole' and one division - sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', - 'x': 1}) - self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) - sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', - 'x': 'whole'}) - self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) - sad = self.pat4.sad({'spp_code': 'species', 'count' :'count', 'x': 1}) - self.assertTrue(np.array_equal(sad[0][2], np.array([0,1,2,3]))) - - # Test correct result with other divisions - sad = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x': 3, - 'y': 2}) - self.assertTrue(np.array_equal(sad[-1][1], np.array([0,0,0,1]))) - - # Test that 'whole' and ignore give the same result - sad1 = self.pat4.sad({'spp_code': 'species', 'count': 'count'}) - sad2 = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x' : - 'whole'}) - self.assertTrue(np.array_equal(sad1[0][1], sad2[0][1])) - - # Test that 'split' keyword returns the correct results - sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' - : 'count'}) - self.assertTrue(len(sad) == 5) - self.assertTrue(np.array_equal(sad[0][1], np.array([2,0]))) - - # Test split and clean on numeric column - sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' - : 'count'}, clean=True) - self.assertTrue(len(sad) == 5) - self.assertTrue(np.array_equal(sad[0][1], np.array([2]))) - - # Test that cleaning sad and split works on string - sad = self.pat7.sad({'spp_code' : 'species', 'count' : 'count', - 'reptile' : 'split'}, clean=True) - self.assertTrue(len(sad) == 4) - self.assertTrue(np.array_equal(sad[0][1], np.array([1,5,3]))) - self.assertTrue(np.array_equal(sad[2][1], np.array([1]))) - self.assertTrue(sad[2][2][0] == 'b') - - def test_parse_criteria(self): - - # Checking parse returns what we would expect - pars = self.pat4.parse_criteria({'spp_code': 'species', 'count': 'count', - 'x': 1}) - self.assertTrue(pars[1] == 'spp_code') - self.assertTrue(pars[2] == 'count') - - # Test that energy, mass and count col are None - pars = self.pat4.parse_criteria({'spp_code': 'species', - 'y': 'whole'}) - self.assertTrue((pars[2] == None) and (pars[3] == None) and (pars[4] == - None)) - - # If species is not specified correctly an error is thrown - self.assertRaises(ValueError, self.pat3.parse_criteria, {'spp_col' - :'species'}) - # Make sure if count is not passed, no error is thrown - self.pat3.parse_criteria({'spp_code': 'species'}) - - # Check energy and mass returns - pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': - 'count', 'energy': 'energy'}) - - self.assertTrue(pars[3] == 'energy') - self.assertTrue(pars[4] == None) - - # Check that combinations in empty dict if no criteria given - pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': - 'count'}) - self.assertTrue(pars[5] == [{}]) - - # TODO: Test that error is thrown if step < prec - - def test_sar(self): - - # Checking that sar function returns correct S0 for full plot - sar = self.pat3.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', - 'count': 'count'}) - self.assertTrue(sar[0]['items'][0] == 5) - - # Checking for correct result for sar - sar = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', - 'count': 'count'}) - self.assertTrue(np.array_equal(sar[1][1], np.array([3,3,2,3]))) - sar = self.pat4.sar(('x', 'y'), [(1,1), (1,2), (3,2)], {'spp_code': - 'species', 'count': 'count'}, form='sar') - self.assertTrue(np.array_equal(sar[1][2], np.array([3,3,2,2,3,1]))) - - # Checking for correct result for ear - ear = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', - 'count': 'count'}, form='ear') - self.assertTrue(np.array_equal(ear[1][1], np.array([0,1,0,0]))) - - # Test that returned areas are correct - sar = self.pat1.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', - 'count': 'count'}) - self.assertTrue(np.round(sar[0]['area'][0], decimals=2) == 0.06) - self.assertTrue(sar[0]['items'][0] == 2) - - def test_universal_sar(self): - - # Check that it returns the right length - criteria = {'spp_code': 'species', 'count' : 'count'} - div_cols = ('x', 'y') - vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2), (2,4), - (4,4)], criteria) - self.assertTrue(len(vals) == 3) - - # If (1,1) is not passed in it should have a length of zero - vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2)], criteria) - self.assertTrue(len(vals) == 0) - - # If (1,1) is not passed in but include_full == True should have len - # equal to 1 - vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2), (2,4)], - criteria, - include_full=True) - self.assertTrue(len(vals) == 2) - - # Test that I get the correct z-value back - vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2)], - criteria) - self.assertTrue(np.round(vals['z'][0], decimals=4) == 0.3390) - - # If I pass in something other than a halving I should still get - # something back - vals = self.pat8.universal_sar(div_cols, [(1,1), (2,2), (2,4), (4,4)], - criteria) - self.assertTrue(len(vals) == 2) - - def test_comm_sep(self): - - # Create result recarray - comm = self.pat9.comm_sep({'plot1': (0,0), 'plot2': (0,1), - 'plot3': (3,4)}, - {'spp_code': 'species', 'count': 'count'}) - - # Create result recarray with dec degree locs - comm_decdeg = self.pat9.comm_sep({'plot1': (9.1,79.0), - 'plot2': (9.2,79.5), 'plot3': (12.7,50)}, - {'spp_code': 'species', 'count': 'count'}, - loc_unit='decdeg') - - # Check distances - dist_sort = np.sort(comm['dist']) - np.testing.assert_array_almost_equal(dist_sort, np.array((1,4.242,5)), - 3) - - # Check distances dec degree - # TODO: Find exact third party comparison formula - formulas online use - # different radii, etc. and give approx same answer - dist_sort = np.sort(comm_decdeg['dist']) - #np.testing.assert_array_almost_equal(dist_sort, - # np.array((56.058,3193.507, - # 3245.820)), 3) - - # Check species in each plot - spp_sort = np.sort(np.array(list(comm['spp-a']) + list(comm['spp-b']))) - np.testing.assert_array_equal(spp_sort, np.array((0,0,3,3,4,4))) - - # Check Sorensen - 2 zeros from empty plot1 - sor_sort = np.sort(comm['sorensen']) - np.testing.assert_array_almost_equal(sor_sort, - np.array((0,0,0.571428571)), 5) - - # Check Jaccard - 2 zeros from empty plot1 - jac_sort = np.sort(comm['jaccard']) - np.testing.assert_array_almost_equal(jac_sort, np.array((0,0,0.4)), 5) - - def test_o_ring(self): - - # Check standard case, no min max, no edge correction, no criteria - # Tests that distances and repeats for count col are correct - result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], - {'spp_code': 'species', 'count': 'count'}) - - np.testing.assert_array_equal(result_list[0][2][0], np.array((8,4))) - np.testing.assert_array_equal(result_list[0][2][1], np.array((2,4))) - - # Check standard case, no min max, no edge correction, with division - result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], - {'spp_code': 'species', 'count': 'count', - 'y': 2}) - - # - First half of y, both species - np.testing.assert_array_equal(result_list[0][2][0], np.array((6,0))) - np.testing.assert_array_equal(result_list[0][2][1], np.array((0,0))) - - # - Second half of y, both species - np.testing.assert_array_equal(result_list[1][2][0], np.array((0,0))) - np.testing.assert_array_equal(result_list[1][2][1], np.array((2,0))) - - # Check edge correction - check only first species - # Almost equal required due to float division - result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], - {'spp_code': 'species', 'count': 'count'}, - edge_correct=True) - np.testing.assert_array_almost_equal(result_list[0][2][0], - np.array((8,18))) - - # Check density - check only second species - print 'here ' - result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], - {'spp_code': 'species', 'count': 'count'}, - density=True) - np.testing.assert_array_almost_equal(result_list[0][2][1], - np.array((1358.12218105,0))) - - def test_ssad(self): - - # Check that ssad does not lose any individuals - ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count'}) - sad = self.pat2.sad({'spp_code': 'species', 'count': 'count'}) - sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) - self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) - - ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count'}) - sad = self.pat6.sad({'spp_code': 'species', 'count': 'count'}) - sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) - self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) - - # Manual checks of correct ssad - ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count', 'x': - 2, 'y': 2}) - self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) - self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) - self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) - self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) - - ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count', 'x' : - 2, 'y': 2}) - self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) - self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) - self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) - self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) - - def test_ied(self): - - # Test correct length of result - eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', - 'energy': 'energy'}) - self.assertTrue(len(eng[0][1]) == 6) - - # Test error if energy column is missing - self.assertRaises(ValueError, self.pat5.ied, - {'spp_code': 'species', 'count': 'count'}) - - # Test normalize is working - eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', - 'energy': 'energy', 'x': 2}) - self.assertTrue(np.array_equal(eng[1][1], np.array([1]))) - self.assertTrue(len(eng[0][1]) == 5) - - # Test mass column and normalize - eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', - 'mass' : 'mass'}, exponent=1, normalize=False) - self.assertTrue(np.array_equal(eng[0][1], np.array([17,17,12,23,45, - 110]))) - - # Test that energy overrides mass - eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', - 'mass' : 'mass', 'energy' : 'energy'}, normalize=False) - self.assertTrue(np.array_equal(eng[0][1], np.array([.5,.5,2,3,4,5]))) - - def test_sed(self): - - # Check correct result - eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', - 'energy': 'energy'}) - self.assertTrue(np.array_equal(eng[0][1]['grt'], - np.array([1,1,4,6]))) - self.assertTrue(np.array_equal(eng[0][1]['rty'], - np.array([8,10]))) - - eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', - 'energy': 'energy', 'x': 2}) - self.assertTrue(np.array_equal(eng[1][1]['rty'], np.array([1]))) - self.assertTrue(len(eng[1][1]) == 2) - -if __name__ == "__main__": - unittest.main() +from numpy.testing import (TestCase, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_, assert_raises) + +from macroeco.empirical import * +import numpy as np +import scipy.stats as stats +import numpy.testing as nt + +class TestEmpiricalCDF(TestCase): + """ Unittests for Empirical cdf """ + + def test_empirical_cdf(self): + + #Test against R's ecdf function + + # Test Case 1 + test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] + R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] + res = empirical_cdf(test_data) + assert_array_equal(R_res, res) + + # Test Case 2 + test_data = [3, 3, 3, 3] + R_res = [1, 1, 1, 1] + res = empirical_cdf(test_data) + assert_array_equal(R_res, res) +# class TestPatch(unittest.TestCase): + +# def setUp(self): +# self.xyfile5 = open('xyfile5.csv','w') +# self.xyfile5.write('''spp_code, x, y, count +# grt, .1, .1, 2 +# grt, .1, .2, 1 +# grt, .1, .3, 1 +# rty, .1, .2, 1 +# rty, .2, .3, 2''') +# self.xyfile5.close() +# self.xymeta5 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', +# 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, +# ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} + +# self.pat1 = Patch('xyfile5.csv') +# # Line below sets metadata manually-no metadata file loaded +# self.pat1.data_table.meta = self.xymeta5 + +# self.xyfile6 = open('xyfile6.csv', 'w') +# self.xyfile6.write('''spp_code, x, y, count +# a, 0, 0, 1 +# b, 0, 0, 1 +# c, 0, 0, 0 +# d, 0, 0, 3 +# a, 0, 1, 0 +# b, 0, 1, 4 +# c, 0, 1, 0 +# d, 0, 1, 1 +# a, 1, 0, 1 +# b, 1, 0, 0 +# c, 1, 0, 3 +# d, 1, 0, 1 +# a, 1, 1, 0 +# b, 1, 1, 1 +# c, 1, 1, 3 +# d, 1, 1, 1''') +# self.xyfile6.close() +# self.xymeta6 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat2 = Patch('xyfile6.csv') +# self.pat2.data_table.meta = self.xymeta6 + +# self.xyfile7 = open('xyfile7.csv', 'w') +# self.xyfile7.write('''spp_code, x, y, count +# tery, 1, 1, 1 +# 1, 1, 1, 1 +# 2, 1, 1, 0 +# 3, 1, 1, 3 +# 0, 1, 2, 0 +# 1, 1, 2, 4 +# 2, 1, 2, 0 +# tery, 1, 2, 1 +# 0, 2, 1, 1 +# 1, 2, 1, 0 +# 2, 2, 1, 3 +# 3, 2, 1, 1 +# tery, 2, 2, 0 +# 1, 2, 2, 1 +# 2, 2, 2, 3 +# 3, 2, 2, 1''') +# self.xyfile7.close() +# self.xymeta7 = {('x', 'maximum'): 2, ('x', 'minimum'): 1, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 2, +# ('y', 'minimum'): 1, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat3 = Patch('xyfile7.csv') +# self.pat3.data_table.meta = self.xymeta7 + +# self.xyfile8 = open('xyfile8.csv', 'w') +# self.xyfile8.write('''spp_code, x, y, count +# 0, 0, 0, 1 +# 1, 0, 0, 1 +# 2, 0, 0, 0 +# 3, 0, 0, 3 +# 0, 0, 1, 0 +# 1, 0, 1, 4 +# 2, 0, 1, 0 +# 3, 0, 1, 1 +# 0, 1, 0, 1 +# 1, 1, 0, 0 +# 2, 1, 0, 3 +# 3, 1, 0, 1 +# 0, 1, 1, 0 +# 1, 1, 1, 1 +# 2, 1, 1, 3 +# 3, 1, 1, 1 +# 0, 2, 0, 0 +# 1, 2, 0, 0 +# 2, 2, 0, 2 +# 3, 2, 0, 4 +# 0, 2, 1, 0 +# 1, 2, 1, 0 +# 2, 2, 1, 0 +# 3, 2, 1, 1''') +# self.xyfile8.close() +# self.xymeta8 = {('x', 'maximum'): 2, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat4 = Patch('xyfile8.csv') +# self.pat4.data_table.meta = self.xymeta8 +# self.xyfile9 = open('xyfile9.csv','w') +# self.xyfile9.write('''spp_code, x, y, count, energy, mass +# grt, .1, .1, 2, 1, 34 +# grt, .1, .2, 1, 2, 12 +# grt, .1, .3, 1, 3, 23 +# rty, .1, .2, 1, 4, 45 +# rty, .2, .3, 1, 5, 110''') +# self.xyfile9.close() +# self.xymeta9 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', +# 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, +# ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} + +# self.pat5 = Patch('xyfile9.csv') +# self.pat5.data_table.meta = self.xymeta9 +# self.xyfile10 = open('xyfile10.csv', 'w') +# self.xyfile10.write('''spp_code, x, y, count +# a, 0, 0, 1 +# b, 0, 0, 1 +# d, 0, 0, 3 +# b, 0, 1, 4 +# d, 0, 1, 1 +# a, 1, 0, 1 +# c, 1, 0, 3 +# d, 1, 0, 1 +# b, 1, 1, 1 +# c, 1, 1, 3 +# d, 1, 1, 1''') +# self.xyfile10.close() +# self.xymeta10 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat6 = Patch('xyfile10.csv') +# self.pat6.data_table.meta = self.xymeta10 +# self.xyfile11 = open('xyfile11.csv', 'w') +# self.xyfile11.write('''spp_code, x, y, count, reptile +# a, 0, 0, 1, lizard +# b, 0, 0, 1, lizard +# d, 0, 0, 3, snake +# b, 0, 1, 4, lizard +# d, 0, 1, 1, turtle +# a, 1, 0, 1, snake +# c, 1, 0, 3, lizard +# d, 1, 0, 1, snake +# b, 1, 1, 1, tuatara +# c, 1, 1, 3, turtle +# d, 1, 1, 1, snake''') +# self.xyfile11.close() +# self.xymeta11 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio', ('reptile', 'maximum') +# : None, ('reptile', 'minimum') : None, ('reptile', 'precision'):None, +# ('reptile', 'type') : 'ordinal'} +# self.pat7 = Patch('xyfile11.csv') +# self.pat7.data_table.meta = self.xymeta11 + +# self.xyfile12 = open('xyfile12.csv', 'w') +# self.xyfile12.write('''spp_code, x, y, count +# 3, 0, 0, 3 +# 3, 0, 1, 1 +# 2, 0, 2, 3 +# 1, 0, 3, 8 +# 3, 1, 0, 1 +# 3, 1, 1, 1 +# 0, 1, 2, 5 +# 3, 1, 3, 1 +# 2, 2, 0, 1 +# 1, 2, 1, 3 +# 1, 2, 2, 6 +# 0, 2, 3, 1 +# 1, 3, 0, 9 +# 2, 3, 1, 1 +# 0, 3, 2, 3 +# 3, 3, 3, 1''') +# self.xyfile12.close() +# self.xymeta12 = {('x', 'maximum'): 3, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 3, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat8 = Patch('xyfile12.csv') +# self.pat8.data_table.meta = self.xymeta12 + +# # Data file with three count colums, unique row for each species +# self.xyfile13 = open('xyfile13.csv', 'w') +# self.xyfile13.write('''spp_code, order, plot1, plot2, plot3 +# a, pred, 0, 0, 0 +# b, pred, 0, 0, 1 +# c, pred, 0, 1, 0 +# d, pred, 0, 2, 3 +# e, scav, 0, 1, 0 +# f, scav, 0, 1, 4''') +# self.xyfile13.close() +# self.xymeta13 = {('spp_code', 'maximum'): None, +# ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, +# ('spp_code', 'type'): 'ordinal', +# ('order', 'maximum'): None, +# ('order', 'minimum'): None, +# ('order', 'precision'): None, +# ('order', 'type'): 'ordinal', +# ('plot1', 'maximum'): None, +# ('plot1', 'minimum'): None, +# ('plot1', 'precision'): None, +# ('plot1', 'type'): 'ratio', +# ('plot2', 'maximum'): None, +# ('plot2', 'minimum'): None, +# ('plot2', 'precision'): None, +# ('plot2', 'type'): 'ratio', +# ('plot3', 'maximum'): None, +# ('plot3', 'minimum'): None, +# ('plot3', 'precision'): None, +# ('plot3', 'type'): 'ratio'} +# self.pat9 = Patch('xyfile13.csv') +# self.pat9.data_table.meta = self.xymeta13 + + + + +# def tearDown(self): +# os.remove('xyfile5.csv') +# os.remove('xyfile6.csv') +# os.remove('xyfile7.csv') +# os.remove('xyfile8.csv') +# os.remove('xyfile9.csv') +# os.remove('xyfile10.csv') +# os.remove('xyfile11.csv') +# os.remove('xyfile12.csv') +# os.remove('xyfile13.csv') + +# # +# # init and set_attributes +# # + +# def test_patch_init(self): + +# # Test entire table is loaded +# self.assertTrue(len(self.pat1.data_table.table) == 5) +# self.assertTrue(len(self.pat2.data_table.table) == 16) + +# # Test that subsetting works +# pat = Patch('xyfile6.csv', {'spp_code': [('!=','a'), ('!=', 'b'), +# ('!=','c')]}) +# self.assertTrue(np.all(pat.data_table.table['spp_code'] == 'd')) +# pat = Patch('xyfile7.csv', {'spp_code': ('==', "tery")}) +# self.assertTrue(sum(pat.data_table.table['count']) == 2) + +# # Testing that metadata was set correctly +# self.assertTrue(self.pat1.data_table.meta[('x', 'maximum')] == .2) + +# def test_sad(self): + +# # Test correct result with 'whole' and one division +# sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', +# 'x': 1}) +# self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) +# sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', +# 'x': 'whole'}) +# self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) +# sad = self.pat4.sad({'spp_code': 'species', 'count' :'count', 'x': 1}) +# self.assertTrue(np.array_equal(sad[0][2], np.array([0,1,2,3]))) + +# # Test correct result with other divisions +# sad = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x': 3, +# 'y': 2}) +# self.assertTrue(np.array_equal(sad[-1][1], np.array([0,0,0,1]))) + +# # Test that 'whole' and ignore give the same result +# sad1 = self.pat4.sad({'spp_code': 'species', 'count': 'count'}) +# sad2 = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x' : +# 'whole'}) +# self.assertTrue(np.array_equal(sad1[0][1], sad2[0][1])) + +# # Test that 'split' keyword returns the correct results +# sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' +# : 'count'}) +# self.assertTrue(len(sad) == 5) +# self.assertTrue(np.array_equal(sad[0][1], np.array([2,0]))) + +# # Test split and clean on numeric column +# sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' +# : 'count'}, clean=True) +# self.assertTrue(len(sad) == 5) +# self.assertTrue(np.array_equal(sad[0][1], np.array([2]))) + +# # Test that cleaning sad and split works on string +# sad = self.pat7.sad({'spp_code' : 'species', 'count' : 'count', +# 'reptile' : 'split'}, clean=True) +# self.assertTrue(len(sad) == 4) +# self.assertTrue(np.array_equal(sad[0][1], np.array([1,5,3]))) +# self.assertTrue(np.array_equal(sad[2][1], np.array([1]))) +# self.assertTrue(sad[2][2][0] == 'b') + +# def test_parse_criteria(self): + +# # Checking parse returns what we would expect +# pars = self.pat4.parse_criteria({'spp_code': 'species', 'count': 'count', +# 'x': 1}) +# self.assertTrue(pars[1] == 'spp_code') +# self.assertTrue(pars[2] == 'count') + +# # Test that energy, mass and count col are None +# pars = self.pat4.parse_criteria({'spp_code': 'species', +# 'y': 'whole'}) +# self.assertTrue((pars[2] == None) and (pars[3] == None) and (pars[4] == +# None)) + +# # If species is not specified correctly an error is thrown +# self.assertRaises(ValueError, self.pat3.parse_criteria, {'spp_col' +# :'species'}) +# # Make sure if count is not passed, no error is thrown +# self.pat3.parse_criteria({'spp_code': 'species'}) + +# # Check energy and mass returns +# pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': +# 'count', 'energy': 'energy'}) + +# self.assertTrue(pars[3] == 'energy') +# self.assertTrue(pars[4] == None) + +# # Check that combinations in empty dict if no criteria given +# pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': +# 'count'}) +# self.assertTrue(pars[5] == [{}]) + +# # TODO: Test that error is thrown if step < prec + +# def test_sar(self): + +# # Checking that sar function returns correct S0 for full plot +# sar = self.pat3.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', +# 'count': 'count'}) +# self.assertTrue(sar[0]['items'][0] == 5) + +# # Checking for correct result for sar +# sar = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', +# 'count': 'count'}) +# self.assertTrue(np.array_equal(sar[1][1], np.array([3,3,2,3]))) +# sar = self.pat4.sar(('x', 'y'), [(1,1), (1,2), (3,2)], {'spp_code': +# 'species', 'count': 'count'}, form='sar') +# self.assertTrue(np.array_equal(sar[1][2], np.array([3,3,2,2,3,1]))) + +# # Checking for correct result for ear +# ear = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', +# 'count': 'count'}, form='ear') +# self.assertTrue(np.array_equal(ear[1][1], np.array([0,1,0,0]))) + +# # Test that returned areas are correct +# sar = self.pat1.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', +# 'count': 'count'}) +# self.assertTrue(np.round(sar[0]['area'][0], decimals=2) == 0.06) +# self.assertTrue(sar[0]['items'][0] == 2) + +# def test_universal_sar(self): + +# # Check that it returns the right length +# criteria = {'spp_code': 'species', 'count' : 'count'} +# div_cols = ('x', 'y') +# vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2), (2,4), +# (4,4)], criteria) +# self.assertTrue(len(vals) == 3) + +# # If (1,1) is not passed in it should have a length of zero +# vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2)], criteria) +# self.assertTrue(len(vals) == 0) + +# # If (1,1) is not passed in but include_full == True should have len +# # equal to 1 +# vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2), (2,4)], +# criteria, +# include_full=True) +# self.assertTrue(len(vals) == 2) + +# # Test that I get the correct z-value back +# vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2)], +# criteria) +# self.assertTrue(np.round(vals['z'][0], decimals=4) == 0.3390) + +# # If I pass in something other than a halving I should still get +# # something back +# vals = self.pat8.universal_sar(div_cols, [(1,1), (2,2), (2,4), (4,4)], +# criteria) +# self.assertTrue(len(vals) == 2) + +# def test_comm_sep(self): + +# # Create result recarray +# comm = self.pat9.comm_sep({'plot1': (0,0), 'plot2': (0,1), +# 'plot3': (3,4)}, +# {'spp_code': 'species', 'count': 'count'}) + +# # Create result recarray with dec degree locs +# comm_decdeg = self.pat9.comm_sep({'plot1': (9.1,79.0), +# 'plot2': (9.2,79.5), 'plot3': (12.7,50)}, +# {'spp_code': 'species', 'count': 'count'}, +# loc_unit='decdeg') + +# # Check distances +# dist_sort = np.sort(comm['dist']) +# np.testing.assert_array_almost_equal(dist_sort, np.array((1,4.242,5)), +# 3) + +# # Check distances dec degree +# # TODO: Find exact third party comparison formula - formulas online use +# # different radii, etc. and give approx same answer +# dist_sort = np.sort(comm_decdeg['dist']) +# #np.testing.assert_array_almost_equal(dist_sort, +# # np.array((56.058,3193.507, +# # 3245.820)), 3) + +# # Check species in each plot +# spp_sort = np.sort(np.array(list(comm['spp-a']) + list(comm['spp-b']))) +# np.testing.assert_array_equal(spp_sort, np.array((0,0,3,3,4,4))) + +# # Check Sorensen - 2 zeros from empty plot1 +# sor_sort = np.sort(comm['sorensen']) +# np.testing.assert_array_almost_equal(sor_sort, +# np.array((0,0,0.571428571)), 5) + +# # Check Jaccard - 2 zeros from empty plot1 +# jac_sort = np.sort(comm['jaccard']) +# np.testing.assert_array_almost_equal(jac_sort, np.array((0,0,0.4)), 5) + +# def test_o_ring(self): + +# # Check standard case, no min max, no edge correction, no criteria +# # Tests that distances and repeats for count col are correct +# result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], +# {'spp_code': 'species', 'count': 'count'}) + +# np.testing.assert_array_equal(result_list[0][2][0], np.array((8,4))) +# np.testing.assert_array_equal(result_list[0][2][1], np.array((2,4))) + +# # Check standard case, no min max, no edge correction, with division +# result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], +# {'spp_code': 'species', 'count': 'count', +# 'y': 2}) + +# # - First half of y, both species +# np.testing.assert_array_equal(result_list[0][2][0], np.array((6,0))) +# np.testing.assert_array_equal(result_list[0][2][1], np.array((0,0))) + +# # - Second half of y, both species +# np.testing.assert_array_equal(result_list[1][2][0], np.array((0,0))) +# np.testing.assert_array_equal(result_list[1][2][1], np.array((2,0))) + +# # Check edge correction - check only first species +# # Almost equal required due to float division +# result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], +# {'spp_code': 'species', 'count': 'count'}, +# edge_correct=True) +# np.testing.assert_array_almost_equal(result_list[0][2][0], +# np.array((8,18))) + +# # Check density - check only second species +# print 'here ' +# result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], +# {'spp_code': 'species', 'count': 'count'}, +# density=True) +# np.testing.assert_array_almost_equal(result_list[0][2][1], +# np.array((1358.12218105,0))) + +# def test_ssad(self): + +# # Check that ssad does not lose any individuals +# ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count'}) +# sad = self.pat2.sad({'spp_code': 'species', 'count': 'count'}) +# sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) +# self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) + +# ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count'}) +# sad = self.pat6.sad({'spp_code': 'species', 'count': 'count'}) +# sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) +# self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) + +# # Manual checks of correct ssad +# ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count', 'x': +# 2, 'y': 2}) +# self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) +# self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) +# self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) +# self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) + +# ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count', 'x' : +# 2, 'y': 2}) +# self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) +# self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) +# self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) +# self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) + +# def test_ied(self): + +# # Test correct length of result +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy'}) +# self.assertTrue(len(eng[0][1]) == 6) + +# # Test error if energy column is missing +# self.assertRaises(ValueError, self.pat5.ied, +# {'spp_code': 'species', 'count': 'count'}) + +# # Test normalize is working +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy', 'x': 2}) +# self.assertTrue(np.array_equal(eng[1][1], np.array([1]))) +# self.assertTrue(len(eng[0][1]) == 5) + +# # Test mass column and normalize +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'mass' : 'mass'}, exponent=1, normalize=False) +# self.assertTrue(np.array_equal(eng[0][1], np.array([17,17,12,23,45, +# 110]))) + +# # Test that energy overrides mass +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'mass' : 'mass', 'energy' : 'energy'}, normalize=False) +# self.assertTrue(np.array_equal(eng[0][1], np.array([.5,.5,2,3,4,5]))) + +# def test_sed(self): + +# # Check correct result +# eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy'}) +# self.assertTrue(np.array_equal(eng[0][1]['grt'], +# np.array([1,1,4,6]))) +# self.assertTrue(np.array_equal(eng[0][1]['rty'], +# np.array([8,10]))) + +# eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy', 'x': 2}) +# self.assertTrue(np.array_equal(eng[1][1]['rty'], np.array([1]))) +# self.assertTrue(len(eng[1][1]) == 2) + +# if __name__ == "__main__": +# unittest.main() From e1591f2f2fd06adbd66e485d2aba552c3119c2df Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 15:34:38 -0700 Subject: [PATCH 172/343] Fixed empirical_cdf plotting --- macroeco/main/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 1112f58..f408065 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -463,8 +463,8 @@ def calc_func(model, df, shapes): # CDF x = core_result['y'].values - emp_cdf = emp.empirical_cdf(x) - df = pd.DataFrame({'x': x, 'empirical': emp_cdf}) + df = emp.empirical_cdf(x) + df.columns = ['x', 'empirical'] def calc_func(model, df, shapes): return eval("mod.%s.cdf(df['x'], *shapes)" % model) From f7ca30979f8d57ede910a865baecd34669ad7dd3 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 16:09:26 -0700 Subject: [PATCH 173/343] cnbinom fit returns b --- macroeco/models/_distributions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 4996f6a..b33497d 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -508,7 +508,7 @@ def fit_mle(self, data, b=None, k_range=(0.1, 100, 0.1)): if not b: b = np.sum(data) - return mu, _solve_k_from_mu(data, k_range, _cnbinom_nll, mu, b) + return mu, _solve_k_from_mu(data, k_range, _cnbinom_nll, mu, b), b def _pmf(self, x, mu, k_agg, b): return np.exp(self._logpmf(x, mu, k_agg, b)) From 4bbe80f260e83121eecc90fc470956aa5f055c64 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 27 Mar 2014 16:09:40 -0700 Subject: [PATCH 174/343] AIC doesn't break main --- macroeco/main/main.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index f408065..c68979f 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -288,7 +288,8 @@ def _fit_models(options, core_results): fits = _get_fits(data, model) # TODO: values is probably better moved to output part values = _get_values(data, model, fits) - stat_names, stats = _get_comparison_statistic(values, fits) + + stat_names, stats = _get_comparison_statistic(data, model, fits) fit_result[model] = [fits, values, stat_names, stats] fit_results.append(fit_result) @@ -310,8 +311,12 @@ def _get_values(data, model, fits): return values -def _get_comparison_statistic(data, fits): - return ['AIC'], [0] + +def _get_comparison_statistic(data, model, fits): + # Just calculating AIC in this function + + aic = comp.AIC(data, eval("mod.%s" % model + "(*fits)")) + return ['AIC'], aic def _save_results(options, module, core_results, fit_results): From 7cf4aa4b982802d5b3e60d7b6ab08353826416cd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 16:47:45 -0700 Subject: [PATCH 175/343] Prepare for format functions in main --- macroeco/main/main.py | 64 ++++++++++++++++++++++++--------------- macroeco/misc/__init__.py | 1 + 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index c68979f..fdadd07 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -13,16 +13,15 @@ import matplotlib.pyplot as plt import matplotlib as mpl -from ..misc import setup_log from .. import empirical as emp from .. import models as mod from .. import compare as comp +from .. import misc -from ..misc.rcparams import ggplot_rc -mpl.rcParams.update(ggplot_rc) +mpl.rcParams.update(misc.rcparams.ggplot_rc) -def main(param_path='parameters.txt', flat_output=False): +def main(param_path='parameters.txt'): """ Entry point function for analysis based on parameter files. @@ -30,9 +29,6 @@ def main(param_path='parameters.txt', flat_output=False): ---------- param_path : str Path to user-generated parameter file - flat_output : bool - Place all output in parameter directory instead of results - subdir. Default False. Only allowed if single run in parameters file. """ @@ -41,14 +37,10 @@ def main(param_path='parameters.txt', flat_output=False): raise IOError, "Parameter file not found at %s" % param_path # Get raw params and base options (non-run-dependent options) - params, base_options = _get_params_base_options(param_path, flat_output) - - # Confirm that flat_output is allowed - if flat_output and len(base_options['run_names']) > 1: - raise ValueError, "flat_output option only possible with a single run" + params, base_options = _get_params_base_options(param_path) # Start logging - log = setup_log(base_options['results_dir']) + log = misc.setup_log(base_options['results_dir']) log.info('Starting analysis') # Do analysis for each run @@ -57,12 +49,15 @@ def main(param_path='parameters.txt', flat_output=False): options = dict(params[run_name]) # All parameters from this run options.update(base_options) # Add base parameters options['run_dir'] = os.path.join(base_options['results_dir'],run_name) - _do_analysis(options) + if 'format' in options['analysis']: + _do_format(options) + else: + _do_analysis(options) log.info('Finished run %s' % run_name) log.info('Finished analysis successfully') -def _get_params_base_options(param_path, flat_output): +def _get_params_base_options(param_path): # Read parameter file into params object params = configparser.ConfigParser() @@ -73,15 +68,13 @@ def _get_params_base_options(param_path, flat_output): # Setup param_dir and results_dir, get run_names param_dir = os.path.abspath(os.path.dirname(param_path)) - if flat_output: - results_dir = param_dir - run_names = [''] - else: - results_dir = os.path.join(param_dir, 'results') - if os.path.isdir(results_dir): - shutil.rmtree(results_dir) - os.makedirs(results_dir) - run_names = params.sections() + results_dir = os.path.join(param_dir, 'results') + + if os.path.isdir(results_dir): + shutil.rmtree(results_dir) + os.makedirs(results_dir) + + run_names = params.sections() # Create options dict base_options = {} @@ -92,6 +85,29 @@ def _get_params_base_options(param_path, flat_output): return params, base_options +def _do_format(options): + """ + Notes + ----- + All format functions take the same parameters: original csv path, output + csv path, and keyword arguments. + + """ + + analysis_name = options['analysis'] + + if analysis_name == 'format_dense': + misc.format_dense() + elif analysis_name == 'format_columnar': + misc.format_columnar() + elif analysis_name == 'format_grid': + misc.format_grid() + elif analysis_name == 'format_transect': + misc.format_transect() + else: + raise NameError, "Cannot format data using analysis %s" % analysis_name + + def _do_analysis(options): """ Do analysis for a single run, as specified by options. diff --git a/macroeco/misc/__init__.py b/macroeco/misc/__init__.py index 0789bdb..5e6a557 100644 --- a/macroeco/misc/__init__.py +++ b/macroeco/misc/__init__.py @@ -1 +1,2 @@ from .misc import * +from .rcparams import * From a562297c19a4671cc0b065a9b6684504027649eb Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 16:59:48 -0700 Subject: [PATCH 176/343] Outline of gen_sar curve --- macroeco/models/__init__.py | 2 +- macroeco/models/_curves.py | 59 +++++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 523fc21..0c8e02d 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -40,4 +40,4 @@ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, expon, expon_uptrunc) -from ._curves import (power_law, mete_sar) +from ._curves import (power_law) diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index 8b815b0..532f5a6 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -142,30 +142,57 @@ def _vals(self, x, c, z): power_law = power_law_gen(name='power_law', parameters='c,z') power_law.__doc__ = power_law.__doc__.format(_doc_methods, _doc_parameters) -class mete_sar_gen(curve): + +class gen_sar_gen(curve): """ - The SAR predicted by the Maximum Entropy Theory of Ecology + INCOMPLETE NEEDS CONTINUED WORK + + A generic SAR based on a combination of an SAD and SSAD .. math:: y = c x^z - or equivalently + The generic SAR may be used either for downscaling, when values of A are + less than A0, or upscaling, when values of A are greater than A0. + Downscaling creates the traditional SAR known to ecologists, while + wpscaling is particularly useful for estimating large-scale species + richness from small-scale plot data. - .. math:: + A keyword argument iterative is available for the generic SAR (default is + False). If True, the SAR is calculated at successive A values, with the + result at each value of A used as the base values of S0 and N0 for the + subsequent calculation. The iterative SAR form is a generalization of the + universal SAR proposed by Harte et al [#]_. - \log(y) = \log(c) + z \log(x) - - {0} + Methods + ------- + vals(S0, N0, A, SAD_model, SSAD_model) + Calculate SAR given starting values and two models. See notes. - {1} - S0, N0 - Parameters: Initial species richness and community abundance at largest - scale + Parameters + ---------- + S0 : float + Species richness at A0 + N0 : float + Community abundance at A0 + A : iterable + Areas at which to calculate SAR (first element is A0) + SAD_model : object + Frozen distribution from macroeco.models + SSAD_model : object + Frozen distribution from macroeco.models iterative : bool If true, SAR calculation for subplots are based on variables for next larger area instead of initial plot variables. Default False. + References + ---------- + .. [#] + Harte, J., Smith, A. B., & Storch, D. (2009). Biodiversity scales from + plots to biomes with a universal species-area curve. Ecology Letters, + 12(8), 789-797. + """ def _vals(self, x, S0, N0, iterative=False): @@ -188,15 +215,15 @@ def _single_step(self, S0, N0, a): S1 = S0 N1 = N0 elif a < 1: # "Normal" downscale - pass + S1 = S0 + N1 = N0 else: # Upscale solver - pass + S1 = S0 + N1 = N0 return S1, N1 -mete_sar = mete_sar_gen(name='mete_sar', parameters='S0,N0') -mete_sar.__doc__ = mete_sar.__doc__.format(_doc_methods, _doc_parameters) - +gen_sar = gen_sar_gen(name='gen_sar', parameters='S0,N0') From 8fd4371a3c486ff5180db9c9fd71acbbf7feaba9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 17:25:08 -0700 Subject: [PATCH 177/343] Add/update module documentation and make all imports explicit --- doc/index.rst | 1 + doc/misc.rst | 1 + macroeco/__init__.py | 39 +++++++++++++++++++++++++++++++++++++++ macroeco/misc/__init__.py | 22 ++++++++++++++++++++-- macroeco/misc/misc.py | 4 ++-- 5 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 doc/misc.rst diff --git a/doc/index.rst b/doc/index.rst index 67abb25..47f75fe 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -14,6 +14,7 @@ Welcome to macroeco. empirical models compare + misc main diff --git a/doc/misc.rst b/doc/misc.rst new file mode 100644 index 0000000..ee73cfa --- /dev/null +++ b/doc/misc.rst @@ -0,0 +1 @@ +.. automodule:: macroeco.misc diff --git a/macroeco/__init__.py b/macroeco/__init__.py index dd7c5fa..882bdb9 100644 --- a/macroeco/__init__.py +++ b/macroeco/__init__.py @@ -1,5 +1,43 @@ """ +=============================================== Macroeco: Ecological pattern analysis in Python +=============================================== + +Macroeco provides a comprehensive set of functions for analyzing empirical +patterns in data, predicting patterns using theory and models, and comparing +empirical results to theory. Many major macroecological patterns can be +analyzed using this package, including the species abundance distribution, the +species and endemics area relationships, several measures of beta diversity, +and many others. + +Extensive documentation for macroeco, including tutorials and a reference +guide, are available at http://macroeco.org. + +The package is organized into five submodules. + +Empirical provides a Patch class for reading data and metadata from an +empirical census and functions that calculate empirical macroecological metrics +based on that data. + +Models provides a set of distributions and curves that have been proposed by +basic theory to describe macroecological metrics. + +Compare provides functions for comparing the empirical and modeled results. + +Misc provides a set of miscellanous functions, including several that aid in +formatting census data for use by functions in the empirical module. + +Main provides a programmatic interface to this package, known as Macroeco +Desktop, that allows a user to specify all of the parameters for an analysis in +a single parameters file, which is then executed, and results saved, with no +additional intervention needed. + +Macroeco was developed at the University of California, Berkeley, by Justin +Kitzes and Mark Wilber. Additional contributors include Chloe Lewis and Ethan +White. The development of macroeco has been supported by the National Science +Foundataion, the Moore Foundation, and the Berkeley Institute for Global Change +Biology. + """ __author__ = "Justin Kitzes and Mark Wilber" @@ -13,4 +51,5 @@ import empirical import models import compare +import main import misc diff --git a/macroeco/misc/__init__.py b/macroeco/misc/__init__.py index 5e6a557..45855cc 100644 --- a/macroeco/misc/__init__.py +++ b/macroeco/misc/__init__.py @@ -1,2 +1,20 @@ -from .misc import * -from .rcparams import * +""" +=============================== +Misc (:mod:`macroeco.misc`) +=============================== + +This module contains miscellaneous functions that support the functions of +other modules of macroeco. + +.. autosummary:: + :toctree: generated/ + + setup_log + inherit_docstring_from + doc_sub + log_start_end + +""" + +from .misc import setup_log, inherit_docstring_from, doc_sub, log_start_end +from .rcparams import ggplot_rc diff --git a/macroeco/misc/misc.py b/macroeco/misc/misc.py index e4a5bab..57578ce 100644 --- a/macroeco/misc/misc.py +++ b/macroeco/misc/misc.py @@ -105,9 +105,9 @@ def inherit_docstring_from(cls): method of the same name from the class `cls`. If the decorated method has no docstring, it is simply given the - docstring of `cls`s method. + docstring of cls method. - From scipy.misc.doccer + Extracted from scipy.misc.doccer. """ def _doc(func): From 0661ed853b48eeed3066285f02075f96473f8094 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 20:50:19 -0700 Subject: [PATCH 178/343] Updates to License and Readme --- LICENSE.txt | 30 +++++++++++++++--------------- README.md | 32 +++++--------------------------- 2 files changed, 20 insertions(+), 42 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 5081918..6a9a0f8 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,22 +1,22 @@ -Copyright (c) 2013-2014, The Regents of the University of California +Copyright (c) 2012-2014, The Regents of the University of California All rights reserved. -Redistribution and use in source and binary forms, with or without +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 10687e8..ee02844 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,8 @@ -macroeco -======== +Macroeco: Ecological pattern analysis in Python +----------------------------------------------- -Overview --------- +Macroeco is a Python package that provides a comprehensive set of functions for analyzing empirical patterns in ecological data, predicting patterns from theory, and comparing empirical results to theory. Many major macroecological patterns can be analyzed using this package, including the species abundance distribution, the species and endemics area relationships, several measures of beta diversity, and many others. -macroeco is a Python package for pattern-based ecological analysis. The package -was developed at UC Berkeley by Justin Kitzes, Mark Wilber, and Chloe Lewis, -and is maintained by Justin Kitzes. +Extensive documentation for macroeco, including detailed installation instructions, tutorials, and a reference guide, is available at http://macroeco.org. The most recent stable version of the macroeco package can be installed from PyPi (`pip install macroeco`). For users who do not program in Python, a standalone application called Macroeco Desktop, which provides most of the functionality of macroeco through a simple interface that requires no programming, is also available. -There is no separate documentation available yet, although the docstrings for -various classes and functions are relatively complete. Refer also to the -software [ecopattern](http://github.com/jkitzes/ecopattern) for examples of the -package in use. - -Installation ------------- - -Simply clone the macroeco directory to a location on your PYTHONPATH, including -as a subdirectory of your current work folder. A `setup.py` file is coming in a -future release. - -Credits -------- -- Authors: Justin Kitzes, Mark Wilber, Chloe Lewis -- Copyright: Copyright 2012, Regents of the University of California -- License: BSD 2-clause -- Maintainer: Justin Kitzes -- Email: jkitzes@berkeley.edu -- Status: Development +The current version of macroeco was developed at the University of California, Berkeley by Justin Kitzes and Mark Wilber and is maintained by Justin Kitzes. Other contributors include Chloe Lewis and Ethan White. The development of macroeco has been supported by the National Science Foundataion, the Gordon and Betty Moore Foundation, and the Berkeley Institute for Global Change Biology. From 366301f26e7f6d0876e54185dfefb341f58ef8c4 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 20:54:26 -0700 Subject: [PATCH 179/343] Fix License indent --- LICENSE.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 6a9a0f8..9b0d6e6 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -4,11 +4,11 @@ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -- Redistributions of source code must retain the above copyright notice, +- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE From d28c6baba7405c5c799b32443713f8c016245709 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 21:29:36 -0700 Subject: [PATCH 180/343] Add additional logging, ensure thread logs captured --- macroeco/main/main.py | 5 +++++ macroeco/misc/__init__.py | 5 ++++- macroeco/misc/misc.py | 5 +---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index fdadd07..97cbad4 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -4,6 +4,7 @@ import shutil import inspect import configparser +import threading as thread from twiggy import log log = log.name('meco') @@ -13,6 +14,7 @@ import matplotlib.pyplot as plt import matplotlib as mpl +from .. __init__ import __version__ from .. import empirical as emp from .. import models as mod from .. import compare as comp @@ -41,6 +43,8 @@ def main(param_path='parameters.txt'): # Start logging log = misc.setup_log(base_options['results_dir']) + log.info('Running macroeco v%s' % __version__) + log.info('Parameters file at %s' % os.path.abspath(param_path)) log.info('Starting analysis') # Do analysis for each run @@ -55,6 +59,7 @@ def main(param_path='parameters.txt'): _do_analysis(options) log.info('Finished run %s' % run_name) log.info('Finished analysis successfully') + log.info('Results available at %s' % options['param_dir']) def _get_params_base_options(param_path): diff --git a/macroeco/misc/__init__.py b/macroeco/misc/__init__.py index 45855cc..a6fe090 100644 --- a/macroeco/misc/__init__.py +++ b/macroeco/misc/__init__.py @@ -16,5 +16,8 @@ """ -from .misc import setup_log, inherit_docstring_from, doc_sub, log_start_end +from .misc import (setup_log, _thread_excepthook, log_start_end, + inherit_docstring_from, doc_sub) from .rcparams import ggplot_rc + +_thread_excepthook() # Make desktop app catch and log sys except from thread diff --git a/macroeco/misc/misc.py b/macroeco/misc/misc.py index 57578ce..bb77267 100644 --- a/macroeco/misc/misc.py +++ b/macroeco/misc/misc.py @@ -38,9 +38,6 @@ def log_uncaught(type1, value1, traceback1): log.options(suppress_newlines=False).critical('\n'+tb_str) sys.excepthook = log_uncaught - # Make threads use sys.excepthook from parent process - _installThreadExcepthook() - return log @@ -77,7 +74,7 @@ def _logger_better_time(gmtime=None): return time.strftime("%Y/%m/%d %H:%M:%S %p", time.localtime()) -def _installThreadExcepthook(): +def _thread_excepthook(): """ Make threads use sys.excepthook from parent process http://bugs.python.org/issue1230540 From 931e911045f6db329474ed5c7cfacdcb68cf66d3 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 27 Mar 2014 21:29:49 -0700 Subject: [PATCH 181/343] Fix syntax bug in geom logpmf --- macroeco/models/_distributions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index b33497d..fd40b4a 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -228,7 +228,7 @@ def _pmf(self, x, p): return (1-p)**x * p def _logpmf(self, x, p): - return x*np.log(1-p) + log(p) + return x*np.log(1-p) + np.log(p) def _cdf(self, x, p): x = np.floor(x) From a9379014e50e61560c7651c5287a0323af1e2f03 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 28 Mar 2014 09:54:43 -0700 Subject: [PATCH 182/343] Added dense formatting --- macroeco/misc/format_data.py | 1073 +++------------------------------- 1 file changed, 68 insertions(+), 1005 deletions(-) diff --git a/macroeco/misc/format_data.py b/macroeco/misc/format_data.py index de9b9e6..0bb66f1 100644 --- a/macroeco/misc/format_data.py +++ b/macroeco/misc/format_data.py @@ -1,1014 +1,77 @@ -#!/usr/bin/python - -'''This module contains 4 separate classes, each built to handle a -canonical data type - -This module provides the user with some formatting functions but does provide -the user with all formatting functions that may be required. This module is -not a substitute for thorough examination of ones data to remove irrelevant -data''' import numpy as np -from matplotlib.mlab import csv2rec -import form_func as ff -from numpy.lib.recfunctions import drop_fields -import csv - - -class Columnar_Data: - ''' - This is the data form that the macroeco software package wants the data - file in. All other canonical data sets are converted to columnar data and - then turned into Columnar_Data objects. - - Examples of columnar data include BCIS, LUQU, and COCO - - Multiple data files must have same format if they are to be merged - - ''' - - def __init__(self, datalist, delimiter=',', missingd=None,\ - delete_missing=False, archival=True): - ''' - This __init__ method takes in data and stores it in rec_arrays. - If specified, it will located missing data points and remove them - from the data set. - - Parameters - ---------- - datalist : string, list of strings, or list of ndarrays. - Data filenames or list of data arrays - - delimiter : string - The file delimiter. Default is ',' - - missingd : dict - Dictionary mapping munged column names to field values which - signify that the field does not contain actual data and should be - masked, e.g. '0000-00-00' or 'unused'. The missing value must be - represented as a string. - - delete_missing : bool - If True, deletes all of the missing values. If False, only deletes - the NaNs from the data. - - archival : bool - If True, a copy of self.columnar_data is made and stored in - self.columnar_archival. If dataset is very large, set to False. - - Note - ---- - If column type is integer, missing values are set to -1. If column - type is float, missing values are set to NaN. If column type is - string, missing values are set to ''. If column type is object, - missing values are set to None. - - ''' - if type(datalist) == str: - datalist = [datalist] - - if np.all(np.array([type(x) == str for x in datalist])): - self.columnar_data = [] - self.data_names = [] - for file_name in datalist: - self.columnar_data.append(csv2rec(file_name, delimiter=delimiter,\ - missingd=missingd)) - self.data_names.append(file_name) - if missingd != None: - if delete_missing: - trun_data = [] - for data in self.columnar_data: - for key in missingd.iterkeys(): - try: - # Missing float - notNaN = (False == np.isnan(data[key])) - except: - notNaN = np.ones(len(data[key]), dtype=bool) - notBlank = np.array([it != '' for it in data[key]]) - notMinusOne = (data[key] != -1)# Missing int - # Missing other - notNone = np.array([i != None for i in data[key]]) - ind = np.bitwise_and(notNaN, notBlank) - ind = np.bitwise_and(ind, notMinusOne) - ind = np.bitwise_and(ind, notNone) - data = data[ind] - trun_data.append(data) - self.columnar_data = trun_data - else: - trun_data = [] - for data in self.columnar_data: - for key in missingd.iterkeys(): - try: - notNaN = (False == np.isnan(data[key])) - except: - notNaN = np.ones(len(data[key]), dtype=bool) - data = data[notNaN] - trun_data.append(data) - self.columnar_data = trun_data - elif np.all(np.array([type(x) == np.ndarray for x in datalist])): - self.columnar_data = datalist - - if archival: - self.columnar_archival = [np.copy(data) for data in - self.columnar_data] - else: - self.columnar_archival = [] - - def reset_columnar_data(self): - ''' - Resets self.columnar_data to self.columnar_archival - - Need to be careful about excessive memory usage! - ''' - if len(self.columnar_archival) == 0: - raise ValueError("The self.columnar_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.columnar_data = [np.copy(data) for data in - self.columnar_archival] - - def subset_data(self, subset={}): - ''' - Subset any given column of the data - - Parameters - ---------- - subset : dict - Dictionary of permanent subset to data, {'column_name': - 'condition'}, which will limit all analysis to records in which - column_name meets the condition, ie, {'year': ('==', 2005), 'x': - [('>', 20), ('<', 40)]} restricts analysis to year 2005 and x - values between 20 and 40. These conditions can also be passed to - the individual methods, but subsetting the data table up front may - save analysis time. Subsetting on a string would look something - like {'name' : [('==', 'John'), ('==', 'Harry')]} - ''' - - - if subset != {}: - # Format column names - subset = ff.format_dict_names(subset) - - sub_data = [] - for data in self.columnar_data: - valid = np.ones(len(data), dtype=bool) - - for key, value in subset.iteritems(): - if type(value) is not type(['a']): # Make all iterables - value = [value] - - # Merge tuples into a string - merged_values = [] - for val in value: - try: # check if val[1] is a string - eval(str(val[1])) - merged_values.append(val[0] + str(val[1])) - except: - merged_values.append(val[0] + "'" + val[1] + "'") - - for this_value in merged_values: - try: - this_valid = eval("data[key]" + this_value) - valid = np.logical_and(valid, this_valid) - except ValueError: #If key can't be found do nothing - pass - - sub_data.append(data[valid]) - - self.columnar_data = sub_data - - def split_up_data_by_field(self, split_columns=None): - ''' - This function will take in the split-columns list and and split the - data into separate arrays based on the list. For example, if one were - to pass in dbh1, dbh2, dbh3 three copies of the data would be - made, each being identical except that each would only contain one of - the instances of dbh. One could also pass [(dbh1, recr1), (dbh2, recr2), - (dbh3, recr3)]. All other fields in split_columns will be excluded - other than the fields within the tuple under consideration. - - Parameters - ---------- - split_columns : list - a list of tuples specifying the columns by which to split the array - - Notes - ----- - Saves the split array as self.columnar_data. - - ''' - #Note: If they enter the wrong column name nothing will be removed - #Should I error check for this? - if split_columns != None: - # Check if split_columns is a list of strings. If so, change it - # into a list of tuples - split_columns = [(s,) if type(s) == str else tuple(s) for s in - split_columns] - - # Format the names in each tuple - split_columns = [tuple(ff.format_headers(nms)) for nms in - split_columns] - - split_data = [] - given_col_names = [] - for tup in split_columns: - for name in tup: - given_col_names.append(name) - given_col_names = np.array(given_col_names) - - - for data in self.columnar_data: - for tup in split_columns: - ind = np.ones(len(given_col_names), dtype=bool) - for name in tup: - ind = np.bitwise_and((name != given_col_names), ind) - remove_names = given_col_names[ind] - split_data.append(drop_fields(data, list(remove_names))) - self.columnar_data = split_data - - def change_column_names(self, change=None, changed_to=None): - ''' - This function takes a list of column names to be changed and a name - that they should be changed to - - Parameters - ---------- - change : list of tuples or strings - Each tuple or string contains column names. All the column names in - the first tuple will be changed to the first element in the - changed_to list and so on. - changed_to : list - A list of strings that contain the names that the columns in change - will be changed to. - - Notes - ----- - This function is useful if you would like to merge self.columnar_data - but the dtype.names are different. - - ''' - if change != None and changed_to != None: - if len(change) != len(changed_to): - raise ValueError('Length of params change and changed_to must' - + ' be equal') - # Convert to tuples if just received strings - change = [(x,) if type(x) == str else tuple(x) for x in change] - - # Format the names in each tuple - change = [tuple(ff.format_headers(nms)) for nms in change] - - for data in self.columnar_data: - column_names = np.array(data.dtype.names) - for i, name_tup in enumerate(change): - for name in name_tup: - find = np.where((name == column_names))[0] - if len(find) != 0: - max_len = np.max([len(x) for x in column_names]) - if max_len < len(changed_to[i]): - column_names = column_names.astype('S' + - str(len(changed_to[i]))) - column_names[find[0]] = changed_to[i] - data.dtype.names = tuple(column_names) - - def add_fields_to_data_list(self, fields_values=None, descr='S20'): - ''' - This functions adds given fields and values to the data list. If the - length of the value for a given keyword in one, it will be broadcast to - the length of self.columnar_data. Else an error will be thrown. - - Parameters - ---------- - fields_values : dict - dictionary with keyword being the the field name to be added and - the value being a tuple with length self.columnar_data specifying - the values to be added to each field in each data set. - descr : a single data type or a dictionary - A single value will be broadcast to appropriate length. The - dictionary must have the same keywords as fields_values and must be - the same length. Each keyword should lookup a dtype. - ''' - if fields_values != None: - self.columnar_data = ff.add_data_fields(self.columnar_data, - fields_values, descr=descr) - - def remove_columns(self, col_names=None): - ''' - This function will remove the all the columns within with names in - col_names from all the datasets in self.columnar_data. - - Parameters - ---------- - col_names : string or list - The name or names of columns to be removed - - ''' - - if col_names != None: - - if type(col_names) == str: - col_names = [col_names] - else: - col_names = list(col_names) - - # Format column names - col_names = ff.format_headers(col_names) - - removed_data = [] - for data in self.columnar_data: - removed_data.append(drop_fields(data, col_names)) - self.columnar_data = removed_data - - def fractionate_data(self, wid_len=None, step=None, col_names=None, - wid_len_old=None, min_old=None, step_old=None): - ''' - This function converts grid numbers to length measurements in - self.columnar_data - - Parameters - ---------- - wid_len : tuple - A tuple containing the the absolute length of the columns being - converted - step : tuple - The desierd precision (step or stride length) of each grid. The - first element in the step tuple corresponds with the first element - in the wid_len tuple and so on. - col_names : array-like object - An array-like object of strings giving the names of the columns - that will be fractionated - wid_len_old : tuple or None - If None, it assumes that a np.unique on datayears[col_name[i]] - gives a array that is the same length as np.arange(0, - wid_len_new[i], step=step_new[i]). If it doesn't, an error will be - thrown. If not None, expects the old maximum length for the given - columns. - min_old : tuple or None - Same as wid_len_old but the old minimum value for each given column - step_old : tuple or None - Same as wid_len_old but the old step (or stride length/spacing) for - each given column. - - ''' - if wid_len != None and step != None and col_names != None: - self.columnar_data = ff.fractionate(self.columnar_data, wid_len, step, - col_names, wid_len_old=wid_len_old, - min_old=min_old, step_old=step_old) - - - def merge_data(self): - ''' - This function concatenates the data files in data_list. The dtypes of - the data in data_list must be identical or this function will fail. - ''' - - self.merged_data = ff.merge_formatted(self.columnar_data) - - def output_merged_data(self, filename): - ''' - This function merges self.columnar_data and outputs the merged data. - - Parameters - ---------- - filename : string - The filename to be output - - ''' - #Merge data in case it has not been done - self.merge_data() - ff.output_form(self.merged_data, filename) - - def output_columnar_data(self, filenames): - ''' - This function outputs the self.columnar_data - - Parameters - ---------- - filenames : list - A list of filenames - - ''' - assert len(filenames) == len(self.columnar_data), "Number of " + \ - "filenames must be the same as the number of datasets" - for i, name in enumerate(filenames): - ff.output_form(self.columnar_data[i], name) - -class Grid_Data: - '''This class handles data should look like the EarthFlow data after a - census. It is a grid with species abundance data in each cell. - ex. - ARTDRA - 6 - GERTYR - 8 - - ''' - - def __init__(self, filenames, archival=True, spp_sep='\n'): - ''' - Pass in the file name(s) of the grid data that you want converted and - the number of columns in each grid. - - Parameters - ---------- - - filenames : str or list of strings - A filename or list of filenames - - archival : bool - If True, a copy of self.grid_data is made and stored in - self.grid_archival. If dataset is very large, set to False. - - ''' - #NOTE: Handle missing data!!!! - - if type(filenames) == str: - filenames = [filenames] - - assert np.all(np.array([name.split('.')[-1] for name in filenames]) ==\ - 'csv'), "Files must be csv" - - self.grid_data = [] - self.cols = [] - self.rows =[] - - for i, name in enumerate(filenames): - # Sometimes csv.reader reads an extra column so you have to read to - # whole file. Seems stupid to read in the file twice but oh well... - with open(name, 'rb') as csvreader: - reader = csv.reader(csvreader) - rows = [row for row in reader] - min_len = np.min([len(row) for row in rows]) - self.cols.append(min_len) - - self.grid_data.append(csv2rec(name, names=list(np.arange(0,\ - self.cols[i]).astype('S10')))) - self.rows.append(len(self.grid_data[i])) - - #Remove all '\n' from the end of each cell in grid - #Not technically necessary but just being clean - self.grid_data = remove_char(self.grid_data, char=spp_sep) - self.grid_data = remove_white_spaces(self.grid_data) - - if archival == True: - self.grid_archival = [np.copy(data) for data in self.grid_data] - else: - self.grid_archival = [] - - def reset_grid_data(self): - ''' - Resets self.grid_data to self.archival_data - - Need to be careful about excessive memory usage! - ''' - - if len(self.grid_archival) == 0: - raise ValueError("The self.grid_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.grid_data = [np.copy(data) for data in self.grid_archival] - - def truncate_grid_cells(self, symbol=None): - ''' - This function will look at each cell in grid list and truncated the - string within the cell at AND after the first instance of a given - symbol. - - Parameters - ---------- - symbol : string or list of strings - The symbol at which to being truncation - - Notes - ----- - symbol is a keyword argument because format_grid_data script gives the - option to run every method. - - ''' - if symbol != None: - - if type(symbol) == str: - symbol = [symbol] - else: - symbol = list(symbol) - - for i in xrange(len(self.grid_data)): - for nm in self.grid_data[i].dtype.names: - for j in xrange(len(self.grid_data[i][nm])): - for sym in symbol: - ind = self.grid_data[i][nm][j].find(sym) - if ind != -1: - self.grid_data[i][nm][j] = \ - self.grid_data[i][nm][j][:ind] - - self.grid_data = remove_char(self.grid_data) - - # List of remove replace tuples? - def remove_and_replace(self, remove=None, replace=''): - ''' - Removes a string from a grid cell and replaces it with another one - - Paramters - --------- - remove : string - String to be removed - replace : string - String to replace removed string - - ''' - - if remove != None and replace != None: - for i in xrange(len(self.grid_data)): - for nm in self.grid_data[i].dtype.names: - for j in xrange(len(self.grid_data[i][nm])): - self.grid_data[i][nm][j] =\ - self.grid_data[i][nm][j].replace(remove, replace) - - def find_unique_spp_in_grid(self, spacer='-', spp_sep='\n'): - ''' - This function finds all of the unique species in the grid. - It assumes that your grid data is in the proper format. - - Parameters - ---------- - spacer : str - The character separating the species code from the species count. - Default value is '-' (n-dash) - - spp_sep : str - The character that separates a speces/count combination from - another species/count combination. Default value is '\n' - - ''' - self.unq_spp_lists = [] - for num, data in enumerate(self.grid_data): - spp_names = [] - for col in data.dtype.names: - for row in xrange(self.rows[num]): - if data[col][row].find(spacer) != -1: - nam_lst = data[col][row].split(spacer) - if len(nam_lst) == 2: - spp_names.append(nam_lst[0].strip()) - else: - spp_names.append(nam_lst[0].strip()) - for i in xrange(1, len(nam_lst) - 1): - spp_names.append(nam_lst[i].split(spp_sep)[1].\ - strip()) - self.unq_spp_lists.append(np.unique(np.array(spp_names))) - - def grid_to_dense(self, spacer='-', spp_sep='\n', archival=True): - ''' - This function converts a the list of gridded data sets into dense - data sets and stores them in dense_data. In addition, it - makes a Dense_Data object out of the newly converted data. - - Parameters - ---------- - spacer : str - The character separating the species code from the species count. - Default value is '-' (n-slash) - - spp_sep : str - The character that separates a speces/count combination from - another species/count combination. Default value is '\n' - - - ''' - - self.find_unique_spp_in_grid(spacer=spacer, spp_sep=spp_sep) - dense_data = [] - for i, data in enumerate(self.grid_data): - dtype_list = [('cell', np.int), ('row', np.int), ('column', np.int)] - for name in self.unq_spp_lists[i]: - tuple_type = (name, np.float) - dtype_list.append(tuple_type) - matrix = np.empty(self.rows[i] * self.cols[i], dtype=dtype_list) - #Iterate through the plot - count = 0 - for col in data.dtype.names: - for row in xrange(self.rows[i]): - matrix['cell'][count] = count - matrix['row'][count] = row - matrix['column'][count] = int(col) - for spp_name in self.unq_spp_lists[i]: - - # Check if cell has species. May be nested occurence! - matrix[spp_name][count] = 0 # Set base to 0 - start = data[col][row].find(spp_name) - if start == -1: # Nothing is there - pass # Count already set to zero - - else: # Something is there, but is it nested? - found = start - while found != -1: - # If this is true, it is nested - if (data[col][row][start + len(spp_name)] !=\ - spacer) or not(start == 0 or \ - data[col][row][start - 1] == spp_sep): - - pass - - else: # Actually a species, so add some - # abundance - - raw = data[col][row][start:].split(spacer)[1] - if raw.find(spp_sep) != -1: - tot_spp = raw.split(spp_sep)[0].strip() - else: - tot_spp = raw.split()[0].strip() - matrix[spp_name][count] += float(tot_spp) - found = data[col][row][start + 1 - :].find(spp_name) - start += found + 1 - count += 1 - dense_data.append(matrix) - self.Dense_Object = Dense_Data(dense_data, archival=archival) - - - def output_grid_data(self, filenames): - ''' - This function prints the data within self.grid_data with the given - filenames. +import pandas as pd - Parameters - ----------- - filenames : list - A list of filnames to which the data will be saved +def format_columnar(): + """ + """ + pass - ''' - assert len(filenames) == len(self.grid_data), "Number of filenames\ - must be the same as the number of datasets" - for i, data in enumerate(self.grid_data): - ff.output_form(data, filenames[i]) +def format_dense(data_path, non_spp_cols, delimiter=",", na_values="", + item_col="spp", count_col="count", nan_to_zero=False, drop_na=False): + """ + Formats dense data type to columnar data type. - -class Dense_Data: - '''This class handles data that are in the dense format. An example of the - dense format is a csv file that has columns named 'row' and 'column' and - the remainder of columns named after each species in the plot. The values - within each species column are the counts within the cell specified by the - columns names 'row' and 'column'. + Takes in a dense data type and converts into a stacked data type. - Note: Need to consider how I might break this class - ''' - - def __init__(self, datalist, delim=',', replace=None, archival=True): - ''' - - Parameters - ----------- - datalist : string, list of strings or list of arrays - List of filenames to be loaded or list of arrays to be set to - self.dense_data - delim : string - The file delimiter - replace : tuple - A tuple of length 2. The first element is a string that - represents the missing values that you would like to replace. The - second element is the value with which you would like to replace - the missing values. - archival : bool - If True, a copy of self.dense_data is made and stored in - self.dense_archival. If dataset is very large, set to False. - - ''' - #TODO: What kind of files could break this - if type(datalist) == str: - datalist = [datalist] - - if np.all(np.array([type(x) == str for x in datalist])): - self.dense_data = [] - if replace != None: - - assert len(replace) == 2, "Replace must contain 2 elements" - - for name in datalist: - self.dense_data.append(replace_vals(name, replace, - delim=delim)) - else: - for name in datalist: - data = csv2rec(name, delimiter=delim) - self.dense_data.append(data) - - elif np.all(np.array([type(x) == np.ndarray for x in datalist])): - self.dense_data = datalist - - if archival: - self.dense_archival = [np.copy(data) for data in - self.dense_data] - else: - self.dense_archival = [] - - def reset_grid_data(self): - ''' - Resets self.grid_data to self.archival_data - - Need to be careful about excessive memory usage! - ''' - - if len(self.dense_archival) == 0: - raise ValueError("The self.dense_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.dense_data = [np.copy(data) for data in self.dense_archival] - - - def dense_to_columnar(self, spp_col_num, num_spp, count_col='count',\ - archival=True): - ''' - This function uses a function in form_func to convert dense data into - columnar data. Stores the columnar data as a Columnar Object. - - Parameters - ---------- - spp_col_num : int - The column number in the dense array where the spp_names begin - - num_spp : tuple or int - Number of species in each dataset in self.dense_data. If it is an - int, it will be broadcasted to the length of self.dense_data - - count_col : str - This string specifies the name of the count column. The default is - 'count'. - - ''' - columnar_data = ff.format_dense(self.dense_data, spp_col_num,\ - num_spp, count_col=count_col) - self.Columnar_Object = Columnar_Data(columnar_data, archival=archival) - - def output_dense_data(self, filenames): - ''' - This function prints the data within self.dense_data with the given - filenames. If self.dense_data has not been filled, error is thrown. - - Parameters - ---------- - filenames : list - A list of filenames to which the data will be saved - - ''' - - assert len(filenames) == len(self.dense_data), "Number of filenames\ - must be the same as the number of datasets" - for i, data in enumerate(self.dense_data): - ff.output_form(data, filenames[i]) - -class Transect_Data: - ''' - This class handles data that are similar to the Breeding Bird survey data. - One column has the species ID, one column has stop and all the other - columns have transects. This class can handle data with "n" nestings, not - just two. For example, the data could have location, transect and stop. - - The "stop" data should all be in consecutive columns - - ''' - - def __init__(self, filenames, delim=',', replace=None, archival=True): - ''' - - Parameters - ---------- - filenames : list - A list of filenames - delim : string - The file delimiter - replace : tuple - A tuple of length 2. The first element is a string which - represents the missing values that you would like to replace. The - second element is the value with which you would like to replace - the missing values. - archival : bool - If True, a copy of self.transect_data is made and stored in - self.transect_archival. If dataset is very large, set to False. - - - ''' - self.transect_data = [] - if type(filenames) == str: - filenames = [filenames] - - if replace != None: - - assert len(replace) == 2, "Replace must contain 2 elements" - replace = (str(replace[0]), replace[1]) - - for name in filenames: - self.transect_data.append(replace_vals(name, replace, - delim=delim)) - else: - for name in filenames: - data = csv2rec(name, delimiter=delim) - self.transect_data.append(data) - - if archival: - self.transect_archival = [np.copy(data) for data in - self.transect_data] - else: - self.transect_archival = [] - - def reset_transect_data(self): - ''' - Resets self.transect_data to self.transect_archival - - Need to be careful about excessive memory usage! - ''' - if len(self.transect_archival) == 0: - raise ValueError("The self.transect_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.transect_data = [np.copy(data) for data in - self.transect_archival] - - def transect_to_columnar(self, stop_col_num, tot_stops, stop_name='stop', - count_name='count', archival=True): - ''' - This function takes transect data and convertes it into columnar data. - In addition it saves the columnar data as a Columnar_Data object. - - - Parameters - ---------- - stop_col_num : iterable or int - The column number where the stop counts begin (0 is the first - column). Can be len(transect_data) or length == 1. Broadcast if - length equals 1. - - tot_stops : iterable or int - The number of columns with stops. Can be len(transect_data) or - length == 1. Broadcast if length equals 1. - - stop_name : str - The name of the new stop column in the formatted data - - count_name : str - The name of the count column. Default is "count" - - - Notes - ----- - This function assumes that all data in self.transect_data are formatted - the same way. For example, the column that contains species names or - codes has the same name throughout all data sets. - - ''' - # Broadcast stop_col_num - stop_col_num = ff.broadcast(len(self.transect_data), stop_col_num) - tot_stops = ff.broadcast(len(self.transect_data), tot_stops) - - columnar_data = [] - for j, data in enumerate(self.transect_data): - nstops = tot_stops[j] - dtypes = data.dtype.descr[ : stop_col_num[j] ] - if (len(dtypes) + nstops) != len(data.dtype.names): - #Accounting for data fields after stops - end_dtypes = data.dtype.descr[(len(dtypes) + nstops) : ] - for x in end_dtypes: - dtypes.append(x) - dtypes.append((stop_name, 'S20')) - dtypes.append((count_name, np.int)) - column_data = np.empty(len(data) * nstops, dtype=dtypes) - for i in xrange(len(data)): - for name in column_data.dtype.names: - if name is stop_name: - column_data[name][i * nstops:(i + 1) * nstops] = \ - np.arange(0, nstops) - elif name is count_name: - column_data[name][i * nstops:(i + 1) * nstops] = \ - np.array(list(data[i]))[stop_col_num[j] : \ - -len(end_dtypes)] - else: - column_data[name][i * nstops:(i + 1) * nstops] = \ - data[name][i] - # Remove all zeros - column_data = column_data[column_data[count_name] != 0] - columnar_data.append(column_data) - self.Columnar_Object = Columnar_Data(columnar_data, archival=archival) - - def output_transect_data(self, filenames): - ''' - This function prints the data within self.columnar_data with the given - filenames. If self.columnar_data has not been filled, an error is - thrown. - - Parameters - ---------- - filenames : list - A list of filenames to which the data will be saved. Must be the - same length as self.columnar_data - - ''' - - assert len(filenames) == len(self.transect_data), "Number of filenames\ - must be the same as the number of datasets" - for i, data in self.transect_data: - ff.output_form(data, filenames[i]) - - -def remove_char(grid_list, char='\n'): - ''' - Removes the given char from the end of each cell in grid list - ''' - - for grid in grid_list: - for name in grid.dtype.names: - for i in xrange(len(grid[name])): - while grid[name][i][::-1].find(char) == 0: - grid[name][i] = grid[name][i][:-1] - - return grid_list - -def remove_white_spaces(grid_list): - ''' - Removes all of the white spaces from strings. - ''' - for grid in grid_list: - for name in grid.dtype.names: - for i in xrange(len(grid[name])): - grid[name][i] = ''.join(grid[name][i].split(' ')) - - return grid_list - -def replace_vals(filename, replace, delim=','): - ''' - Replace the values in filename with specified values in replace_values - Parameters ---------- - filename : string - Will be read into a rec array - - replace_values : tuple - First object is value to replace and second object is what to replace - it with - - - ''' - data = csv2rec(filename, delimiter=delim, missing=replace[0]) - for nm in data.dtype.names: - try: - # Missing float - isNaN = (np.isnan(data[nm])) - except: - isNaN = np.zeros(len(data[nm]), dtype=bool) - isBlank = np.array([it == '' for it in data[nm]]) - isMinusOne = (data[nm] == -1)# Missing int - # Missing other - isNone = np.array([i == None for i in data[nm]]) - ind = np.bitwise_or(isNaN, isBlank) - ind = np.bitwise_or(ind, isMinusOne) - ind = np.bitwise_or(ind, isNone) - data[nm][ind] = replace[1] - return data - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + data_path : str + A path to the dense data + non_spp_cols : list + A list of columns in the data that are not species columns + delimiter : str + The delimiter for the dense data. Default, "," + na_values : int, float, str, or list + Values to be labeled as NA. Default, "" + item_col : str + Name of the item column in the formatted data. Default, "spp" + count_col : str + Name of the count column in the formatted data. Default, "count" + nan_to_zero : bool + Set all nans to zero. Default, False + drop_na : bool + Drop all columns with nan in the dataset. Default, False + + Notes + ----- + Examples of Dense Data conversion + + + """ + + # Default arguments + base_data = pd.read_csv(data_path, sep=delimiter, + na_values=na_values) + + # Stack data in columnar form + indexed_data = base_data.set_index(keys=non_spp_cols) + columnar_data = indexed_data.stack() + columnar_data = columnar_data.reset_index() + + # Set nans to zero? + if nan_to_zero: + columnar_data[np.isnan(columnar_data)] = 0 + + # Drop nans? + if drop_na: + columnar_data = columnar_data.dropna(how="any") + + # Rename columns + num = len(non_spp_cols) + columnar_data.rename(columns={0: count_col, 'level_%i' % num: + item_col}, inplace=True) + + return columnar_data + + +def format_transect(): + """ + """ + pass + +def format_grid(): + """ + """ + pass From 2e9d5e460c02e85f357137971757fd2f555550a4 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 28 Mar 2014 09:55:31 -0700 Subject: [PATCH 183/343] Added formatting for init --- macroeco/misc/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macroeco/misc/__init__.py b/macroeco/misc/__init__.py index 5e6a557..ba0aa84 100644 --- a/macroeco/misc/__init__.py +++ b/macroeco/misc/__init__.py @@ -1,2 +1,4 @@ from .misc import * from .rcparams import * +from format_data import (format_columnar, format_dense, format_grid, + format_transect) From 92f92fadaa5d1a2be4aa041741809c4bdccb4892 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 28 Mar 2014 09:58:38 -0700 Subject: [PATCH 184/343] Added empirical tests and renamed old_format_data --- macroeco/empirical/test_empirical.py | 599 +++++++++++++++ macroeco/misc/old_format_data.py | 1014 ++++++++++++++++++++++++++ 2 files changed, 1613 insertions(+) create mode 100644 macroeco/empirical/test_empirical.py create mode 100644 macroeco/misc/old_format_data.py diff --git a/macroeco/empirical/test_empirical.py b/macroeco/empirical/test_empirical.py new file mode 100644 index 0000000..b2f28d5 --- /dev/null +++ b/macroeco/empirical/test_empirical.py @@ -0,0 +1,599 @@ +''' +Unit tests for empirical.py +''' + +from __future__ import division + +from numpy.testing import (TestCase, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_, assert_raises) + +from macroeco.empirical import * +import numpy as np +import scipy.stats as stats +import numpy.testing as nt + +class TestEmpiricalCDF(TestCase): + """ Unittests for Empirical cdf """ + + def test_empirical_cdf_vs_R(self): + + #Test against R's ecdf function + + # Test Case 1 + test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] + R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] + res = empirical_cdf(test_data) + assert_array_equal(R_res, res) + + # Test Case 2 + test_data = [3, 3, 3, 3] + R_res = [1, 1, 1, 1] + res = empirical_cdf(test_data) + assert_array_equal(R_res, res) + +# class TestPatch(unittest.TestCase): + +# def setUp(self): +# self.xyfile5 = open('xyfile5.csv','w') +# self.xyfile5.write('''spp_code, x, y, count +# grt, .1, .1, 2 +# grt, .1, .2, 1 +# grt, .1, .3, 1 +# rty, .1, .2, 1 +# rty, .2, .3, 2''') +# self.xyfile5.close() +# self.xymeta5 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', +# 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, +# ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} + +# self.pat1 = Patch('xyfile5.csv') +# # Line below sets metadata manually-no metadata file loaded +# self.pat1.data_table.meta = self.xymeta5 + +# self.xyfile6 = open('xyfile6.csv', 'w') +# self.xyfile6.write('''spp_code, x, y, count +# a, 0, 0, 1 +# b, 0, 0, 1 +# c, 0, 0, 0 +# d, 0, 0, 3 +# a, 0, 1, 0 +# b, 0, 1, 4 +# c, 0, 1, 0 +# d, 0, 1, 1 +# a, 1, 0, 1 +# b, 1, 0, 0 +# c, 1, 0, 3 +# d, 1, 0, 1 +# a, 1, 1, 0 +# b, 1, 1, 1 +# c, 1, 1, 3 +# d, 1, 1, 1''') +# self.xyfile6.close() +# self.xymeta6 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat2 = Patch('xyfile6.csv') +# self.pat2.data_table.meta = self.xymeta6 + +# self.xyfile7 = open('xyfile7.csv', 'w') +# self.xyfile7.write('''spp_code, x, y, count +# tery, 1, 1, 1 +# 1, 1, 1, 1 +# 2, 1, 1, 0 +# 3, 1, 1, 3 +# 0, 1, 2, 0 +# 1, 1, 2, 4 +# 2, 1, 2, 0 +# tery, 1, 2, 1 +# 0, 2, 1, 1 +# 1, 2, 1, 0 +# 2, 2, 1, 3 +# 3, 2, 1, 1 +# tery, 2, 2, 0 +# 1, 2, 2, 1 +# 2, 2, 2, 3 +# 3, 2, 2, 1''') +# self.xyfile7.close() +# self.xymeta7 = {('x', 'maximum'): 2, ('x', 'minimum'): 1, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 2, +# ('y', 'minimum'): 1, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat3 = Patch('xyfile7.csv') +# self.pat3.data_table.meta = self.xymeta7 + +# self.xyfile8 = open('xyfile8.csv', 'w') +# self.xyfile8.write('''spp_code, x, y, count +# 0, 0, 0, 1 +# 1, 0, 0, 1 +# 2, 0, 0, 0 +# 3, 0, 0, 3 +# 0, 0, 1, 0 +# 1, 0, 1, 4 +# 2, 0, 1, 0 +# 3, 0, 1, 1 +# 0, 1, 0, 1 +# 1, 1, 0, 0 +# 2, 1, 0, 3 +# 3, 1, 0, 1 +# 0, 1, 1, 0 +# 1, 1, 1, 1 +# 2, 1, 1, 3 +# 3, 1, 1, 1 +# 0, 2, 0, 0 +# 1, 2, 0, 0 +# 2, 2, 0, 2 +# 3, 2, 0, 4 +# 0, 2, 1, 0 +# 1, 2, 1, 0 +# 2, 2, 1, 0 +# 3, 2, 1, 1''') +# self.xyfile8.close() +# self.xymeta8 = {('x', 'maximum'): 2, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat4 = Patch('xyfile8.csv') +# self.pat4.data_table.meta = self.xymeta8 +# self.xyfile9 = open('xyfile9.csv','w') +# self.xyfile9.write('''spp_code, x, y, count, energy, mass +# grt, .1, .1, 2, 1, 34 +# grt, .1, .2, 1, 2, 12 +# grt, .1, .3, 1, 3, 23 +# rty, .1, .2, 1, 4, 45 +# rty, .2, .3, 1, 5, 110''') +# self.xyfile9.close() +# self.xymeta9 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', +# 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, +# ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} + +# self.pat5 = Patch('xyfile9.csv') +# self.pat5.data_table.meta = self.xymeta9 +# self.xyfile10 = open('xyfile10.csv', 'w') +# self.xyfile10.write('''spp_code, x, y, count +# a, 0, 0, 1 +# b, 0, 0, 1 +# d, 0, 0, 3 +# b, 0, 1, 4 +# d, 0, 1, 1 +# a, 1, 0, 1 +# c, 1, 0, 3 +# d, 1, 0, 1 +# b, 1, 1, 1 +# c, 1, 1, 3 +# d, 1, 1, 1''') +# self.xyfile10.close() +# self.xymeta10 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat6 = Patch('xyfile10.csv') +# self.pat6.data_table.meta = self.xymeta10 +# self.xyfile11 = open('xyfile11.csv', 'w') +# self.xyfile11.write('''spp_code, x, y, count, reptile +# a, 0, 0, 1, lizard +# b, 0, 0, 1, lizard +# d, 0, 0, 3, snake +# b, 0, 1, 4, lizard +# d, 0, 1, 1, turtle +# a, 1, 0, 1, snake +# c, 1, 0, 3, lizard +# d, 1, 0, 1, snake +# b, 1, 1, 1, tuatara +# c, 1, 1, 3, turtle +# d, 1, 1, 1, snake''') +# self.xyfile11.close() +# self.xymeta11 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio', ('reptile', 'maximum') +# : None, ('reptile', 'minimum') : None, ('reptile', 'precision'):None, +# ('reptile', 'type') : 'ordinal'} +# self.pat7 = Patch('xyfile11.csv') +# self.pat7.data_table.meta = self.xymeta11 + +# self.xyfile12 = open('xyfile12.csv', 'w') +# self.xyfile12.write('''spp_code, x, y, count +# 3, 0, 0, 3 +# 3, 0, 1, 1 +# 2, 0, 2, 3 +# 1, 0, 3, 8 +# 3, 1, 0, 1 +# 3, 1, 1, 1 +# 0, 1, 2, 5 +# 3, 1, 3, 1 +# 2, 2, 0, 1 +# 1, 2, 1, 3 +# 1, 2, 2, 6 +# 0, 2, 3, 1 +# 1, 3, 0, 9 +# 2, 3, 1, 1 +# 0, 3, 2, 3 +# 3, 3, 3, 1''') +# self.xyfile12.close() +# self.xymeta12 = {('x', 'maximum'): 3, ('x', 'minimum'): 0, ('x', +# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 3, +# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', +# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', +# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', +# 'precision'): None, ('count', 'type'): 'ratio'} +# self.pat8 = Patch('xyfile12.csv') +# self.pat8.data_table.meta = self.xymeta12 + +# # Data file with three count colums, unique row for each species +# self.xyfile13 = open('xyfile13.csv', 'w') +# self.xyfile13.write('''spp_code, order, plot1, plot2, plot3 +# a, pred, 0, 0, 0 +# b, pred, 0, 0, 1 +# c, pred, 0, 1, 0 +# d, pred, 0, 2, 3 +# e, scav, 0, 1, 0 +# f, scav, 0, 1, 4''') +# self.xyfile13.close() +# self.xymeta13 = {('spp_code', 'maximum'): None, +# ('spp_code', 'minimum'): None, +# ('spp_code', 'precision'): None, +# ('spp_code', 'type'): 'ordinal', +# ('order', 'maximum'): None, +# ('order', 'minimum'): None, +# ('order', 'precision'): None, +# ('order', 'type'): 'ordinal', +# ('plot1', 'maximum'): None, +# ('plot1', 'minimum'): None, +# ('plot1', 'precision'): None, +# ('plot1', 'type'): 'ratio', +# ('plot2', 'maximum'): None, +# ('plot2', 'minimum'): None, +# ('plot2', 'precision'): None, +# ('plot2', 'type'): 'ratio', +# ('plot3', 'maximum'): None, +# ('plot3', 'minimum'): None, +# ('plot3', 'precision'): None, +# ('plot3', 'type'): 'ratio'} +# self.pat9 = Patch('xyfile13.csv') +# self.pat9.data_table.meta = self.xymeta13 + + + + +# def tearDown(self): +# os.remove('xyfile5.csv') +# os.remove('xyfile6.csv') +# os.remove('xyfile7.csv') +# os.remove('xyfile8.csv') +# os.remove('xyfile9.csv') +# os.remove('xyfile10.csv') +# os.remove('xyfile11.csv') +# os.remove('xyfile12.csv') +# os.remove('xyfile13.csv') + +# # +# # init and set_attributes +# # + +# def test_patch_init(self): + +# # Test entire table is loaded +# self.assertTrue(len(self.pat1.data_table.table) == 5) +# self.assertTrue(len(self.pat2.data_table.table) == 16) + +# # Test that subsetting works +# pat = Patch('xyfile6.csv', {'spp_code': [('!=','a'), ('!=', 'b'), +# ('!=','c')]}) +# self.assertTrue(np.all(pat.data_table.table['spp_code'] == 'd')) +# pat = Patch('xyfile7.csv', {'spp_code': ('==', "tery")}) +# self.assertTrue(sum(pat.data_table.table['count']) == 2) + +# # Testing that metadata was set correctly +# self.assertTrue(self.pat1.data_table.meta[('x', 'maximum')] == .2) + +# def test_sad(self): + +# # Test correct result with 'whole' and one division +# sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', +# 'x': 1}) +# self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) +# sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', +# 'x': 'whole'}) +# self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) +# sad = self.pat4.sad({'spp_code': 'species', 'count' :'count', 'x': 1}) +# self.assertTrue(np.array_equal(sad[0][2], np.array([0,1,2,3]))) + +# # Test correct result with other divisions +# sad = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x': 3, +# 'y': 2}) +# self.assertTrue(np.array_equal(sad[-1][1], np.array([0,0,0,1]))) + +# # Test that 'whole' and ignore give the same result +# sad1 = self.pat4.sad({'spp_code': 'species', 'count': 'count'}) +# sad2 = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x' : +# 'whole'}) +# self.assertTrue(np.array_equal(sad1[0][1], sad2[0][1])) + +# # Test that 'split' keyword returns the correct results +# sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' +# : 'count'}) +# self.assertTrue(len(sad) == 5) +# self.assertTrue(np.array_equal(sad[0][1], np.array([2,0]))) + +# # Test split and clean on numeric column +# sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' +# : 'count'}, clean=True) +# self.assertTrue(len(sad) == 5) +# self.assertTrue(np.array_equal(sad[0][1], np.array([2]))) + +# # Test that cleaning sad and split works on string +# sad = self.pat7.sad({'spp_code' : 'species', 'count' : 'count', +# 'reptile' : 'split'}, clean=True) +# self.assertTrue(len(sad) == 4) +# self.assertTrue(np.array_equal(sad[0][1], np.array([1,5,3]))) +# self.assertTrue(np.array_equal(sad[2][1], np.array([1]))) +# self.assertTrue(sad[2][2][0] == 'b') + +# def test_parse_criteria(self): + +# # Checking parse returns what we would expect +# pars = self.pat4.parse_criteria({'spp_code': 'species', 'count': 'count', +# 'x': 1}) +# self.assertTrue(pars[1] == 'spp_code') +# self.assertTrue(pars[2] == 'count') + +# # Test that energy, mass and count col are None +# pars = self.pat4.parse_criteria({'spp_code': 'species', +# 'y': 'whole'}) +# self.assertTrue((pars[2] == None) and (pars[3] == None) and (pars[4] == +# None)) + +# # If species is not specified correctly an error is thrown +# self.assertRaises(ValueError, self.pat3.parse_criteria, {'spp_col' +# :'species'}) +# # Make sure if count is not passed, no error is thrown +# self.pat3.parse_criteria({'spp_code': 'species'}) + +# # Check energy and mass returns +# pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': +# 'count', 'energy': 'energy'}) + +# self.assertTrue(pars[3] == 'energy') +# self.assertTrue(pars[4] == None) + +# # Check that combinations in empty dict if no criteria given +# pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': +# 'count'}) +# self.assertTrue(pars[5] == [{}]) + +# # TODO: Test that error is thrown if step < prec + +# def test_sar(self): + +# # Checking that sar function returns correct S0 for full plot +# sar = self.pat3.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', +# 'count': 'count'}) +# self.assertTrue(sar[0]['items'][0] == 5) + +# # Checking for correct result for sar +# sar = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', +# 'count': 'count'}) +# self.assertTrue(np.array_equal(sar[1][1], np.array([3,3,2,3]))) +# sar = self.pat4.sar(('x', 'y'), [(1,1), (1,2), (3,2)], {'spp_code': +# 'species', 'count': 'count'}, form='sar') +# self.assertTrue(np.array_equal(sar[1][2], np.array([3,3,2,2,3,1]))) + +# # Checking for correct result for ear +# ear = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', +# 'count': 'count'}, form='ear') +# self.assertTrue(np.array_equal(ear[1][1], np.array([0,1,0,0]))) + +# # Test that returned areas are correct +# sar = self.pat1.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', +# 'count': 'count'}) +# self.assertTrue(np.round(sar[0]['area'][0], decimals=2) == 0.06) +# self.assertTrue(sar[0]['items'][0] == 2) + +# def test_universal_sar(self): + +# # Check that it returns the right length +# criteria = {'spp_code': 'species', 'count' : 'count'} +# div_cols = ('x', 'y') +# vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2), (2,4), +# (4,4)], criteria) +# self.assertTrue(len(vals) == 3) + +# # If (1,1) is not passed in it should have a length of zero +# vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2)], criteria) +# self.assertTrue(len(vals) == 0) + +# # If (1,1) is not passed in but include_full == True should have len +# # equal to 1 +# vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2), (2,4)], +# criteria, +# include_full=True) +# self.assertTrue(len(vals) == 2) + +# # Test that I get the correct z-value back +# vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2)], +# criteria) +# self.assertTrue(np.round(vals['z'][0], decimals=4) == 0.3390) + +# # If I pass in something other than a halving I should still get +# # something back +# vals = self.pat8.universal_sar(div_cols, [(1,1), (2,2), (2,4), (4,4)], +# criteria) +# self.assertTrue(len(vals) == 2) + +# def test_comm_sep(self): + +# # Create result recarray +# comm = self.pat9.comm_sep({'plot1': (0,0), 'plot2': (0,1), +# 'plot3': (3,4)}, +# {'spp_code': 'species', 'count': 'count'}) + +# # Create result recarray with dec degree locs +# comm_decdeg = self.pat9.comm_sep({'plot1': (9.1,79.0), +# 'plot2': (9.2,79.5), 'plot3': (12.7,50)}, +# {'spp_code': 'species', 'count': 'count'}, +# loc_unit='decdeg') + +# # Check distances +# dist_sort = np.sort(comm['dist']) +# np.testing.assert_array_almost_equal(dist_sort, np.array((1,4.242,5)), +# 3) + +# # Check distances dec degree +# # TODO: Find exact third party comparison formula - formulas online use +# # different radii, etc. and give approx same answer +# dist_sort = np.sort(comm_decdeg['dist']) +# #np.testing.assert_array_almost_equal(dist_sort, +# # np.array((56.058,3193.507, +# # 3245.820)), 3) + +# # Check species in each plot +# spp_sort = np.sort(np.array(list(comm['spp-a']) + list(comm['spp-b']))) +# np.testing.assert_array_equal(spp_sort, np.array((0,0,3,3,4,4))) + +# # Check Sorensen - 2 zeros from empty plot1 +# sor_sort = np.sort(comm['sorensen']) +# np.testing.assert_array_almost_equal(sor_sort, +# np.array((0,0,0.571428571)), 5) + +# # Check Jaccard - 2 zeros from empty plot1 +# jac_sort = np.sort(comm['jaccard']) +# np.testing.assert_array_almost_equal(jac_sort, np.array((0,0,0.4)), 5) + +# def test_o_ring(self): + +# # Check standard case, no min max, no edge correction, no criteria +# # Tests that distances and repeats for count col are correct +# result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], +# {'spp_code': 'species', 'count': 'count'}) + +# np.testing.assert_array_equal(result_list[0][2][0], np.array((8,4))) +# np.testing.assert_array_equal(result_list[0][2][1], np.array((2,4))) + +# # Check standard case, no min max, no edge correction, with division +# result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], +# {'spp_code': 'species', 'count': 'count', +# 'y': 2}) + +# # - First half of y, both species +# np.testing.assert_array_equal(result_list[0][2][0], np.array((6,0))) +# np.testing.assert_array_equal(result_list[0][2][1], np.array((0,0))) + +# # - Second half of y, both species +# np.testing.assert_array_equal(result_list[1][2][0], np.array((0,0))) +# np.testing.assert_array_equal(result_list[1][2][1], np.array((2,0))) + +# # Check edge correction - check only first species +# # Almost equal required due to float division +# result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], +# {'spp_code': 'species', 'count': 'count'}, +# edge_correct=True) +# np.testing.assert_array_almost_equal(result_list[0][2][0], +# np.array((8,18))) + +# # Check density - check only second species +# print 'here ' +# result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], +# {'spp_code': 'species', 'count': 'count'}, +# density=True) +# np.testing.assert_array_almost_equal(result_list[0][2][1], +# np.array((1358.12218105,0))) + +# def test_ssad(self): + +# # Check that ssad does not lose any individuals +# ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count'}) +# sad = self.pat2.sad({'spp_code': 'species', 'count': 'count'}) +# sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) +# self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) + +# ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count'}) +# sad = self.pat6.sad({'spp_code': 'species', 'count': 'count'}) +# sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) +# self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) + +# # Manual checks of correct ssad +# ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count', 'x': +# 2, 'y': 2}) +# self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) +# self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) +# self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) +# self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) + +# ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count', 'x' : +# 2, 'y': 2}) +# self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) +# self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) +# self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) +# self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) + +# def test_ied(self): + +# # Test correct length of result +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy'}) +# self.assertTrue(len(eng[0][1]) == 6) + +# # Test error if energy column is missing +# self.assertRaises(ValueError, self.pat5.ied, +# {'spp_code': 'species', 'count': 'count'}) + +# # Test normalize is working +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy', 'x': 2}) +# self.assertTrue(np.array_equal(eng[1][1], np.array([1]))) +# self.assertTrue(len(eng[0][1]) == 5) + +# # Test mass column and normalize +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'mass' : 'mass'}, exponent=1, normalize=False) +# self.assertTrue(np.array_equal(eng[0][1], np.array([17,17,12,23,45, +# 110]))) + +# # Test that energy overrides mass +# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', +# 'mass' : 'mass', 'energy' : 'energy'}, normalize=False) +# self.assertTrue(np.array_equal(eng[0][1], np.array([.5,.5,2,3,4,5]))) + +# def test_sed(self): + +# # Check correct result +# eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy'}) +# self.assertTrue(np.array_equal(eng[0][1]['grt'], +# np.array([1,1,4,6]))) +# self.assertTrue(np.array_equal(eng[0][1]['rty'], +# np.array([8,10]))) + +# eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', +# 'energy': 'energy', 'x': 2}) +# self.assertTrue(np.array_equal(eng[1][1]['rty'], np.array([1]))) +# self.assertTrue(len(eng[1][1]) == 2) + +# if __name__ == "__main__": +# unittest.main() diff --git a/macroeco/misc/old_format_data.py b/macroeco/misc/old_format_data.py new file mode 100644 index 0000000..de9b9e6 --- /dev/null +++ b/macroeco/misc/old_format_data.py @@ -0,0 +1,1014 @@ +#!/usr/bin/python + +'''This module contains 4 separate classes, each built to handle a +canonical data type + +This module provides the user with some formatting functions but does provide +the user with all formatting functions that may be required. This module is +not a substitute for thorough examination of ones data to remove irrelevant +data''' + +import numpy as np +from matplotlib.mlab import csv2rec +import form_func as ff +from numpy.lib.recfunctions import drop_fields +import csv + + +class Columnar_Data: + ''' + This is the data form that the macroeco software package wants the data + file in. All other canonical data sets are converted to columnar data and + then turned into Columnar_Data objects. + + Examples of columnar data include BCIS, LUQU, and COCO + + Multiple data files must have same format if they are to be merged + + ''' + + def __init__(self, datalist, delimiter=',', missingd=None,\ + delete_missing=False, archival=True): + ''' + This __init__ method takes in data and stores it in rec_arrays. + If specified, it will located missing data points and remove them + from the data set. + + Parameters + ---------- + datalist : string, list of strings, or list of ndarrays. + Data filenames or list of data arrays + + delimiter : string + The file delimiter. Default is ',' + + missingd : dict + Dictionary mapping munged column names to field values which + signify that the field does not contain actual data and should be + masked, e.g. '0000-00-00' or 'unused'. The missing value must be + represented as a string. + + delete_missing : bool + If True, deletes all of the missing values. If False, only deletes + the NaNs from the data. + + archival : bool + If True, a copy of self.columnar_data is made and stored in + self.columnar_archival. If dataset is very large, set to False. + + Note + ---- + If column type is integer, missing values are set to -1. If column + type is float, missing values are set to NaN. If column type is + string, missing values are set to ''. If column type is object, + missing values are set to None. + + ''' + if type(datalist) == str: + datalist = [datalist] + + if np.all(np.array([type(x) == str for x in datalist])): + self.columnar_data = [] + self.data_names = [] + for file_name in datalist: + self.columnar_data.append(csv2rec(file_name, delimiter=delimiter,\ + missingd=missingd)) + self.data_names.append(file_name) + if missingd != None: + if delete_missing: + trun_data = [] + for data in self.columnar_data: + for key in missingd.iterkeys(): + try: + # Missing float + notNaN = (False == np.isnan(data[key])) + except: + notNaN = np.ones(len(data[key]), dtype=bool) + notBlank = np.array([it != '' for it in data[key]]) + notMinusOne = (data[key] != -1)# Missing int + # Missing other + notNone = np.array([i != None for i in data[key]]) + ind = np.bitwise_and(notNaN, notBlank) + ind = np.bitwise_and(ind, notMinusOne) + ind = np.bitwise_and(ind, notNone) + data = data[ind] + trun_data.append(data) + self.columnar_data = trun_data + else: + trun_data = [] + for data in self.columnar_data: + for key in missingd.iterkeys(): + try: + notNaN = (False == np.isnan(data[key])) + except: + notNaN = np.ones(len(data[key]), dtype=bool) + data = data[notNaN] + trun_data.append(data) + self.columnar_data = trun_data + elif np.all(np.array([type(x) == np.ndarray for x in datalist])): + self.columnar_data = datalist + + if archival: + self.columnar_archival = [np.copy(data) for data in + self.columnar_data] + else: + self.columnar_archival = [] + + def reset_columnar_data(self): + ''' + Resets self.columnar_data to self.columnar_archival + + Need to be careful about excessive memory usage! + ''' + if len(self.columnar_archival) == 0: + raise ValueError("The self.columnar_archival attribute of this %s" + % (self.__class__.__name__) + " object has not" + + " been initialized") + else: + self.columnar_data = [np.copy(data) for data in + self.columnar_archival] + + def subset_data(self, subset={}): + ''' + Subset any given column of the data + + Parameters + ---------- + subset : dict + Dictionary of permanent subset to data, {'column_name': + 'condition'}, which will limit all analysis to records in which + column_name meets the condition, ie, {'year': ('==', 2005), 'x': + [('>', 20), ('<', 40)]} restricts analysis to year 2005 and x + values between 20 and 40. These conditions can also be passed to + the individual methods, but subsetting the data table up front may + save analysis time. Subsetting on a string would look something + like {'name' : [('==', 'John'), ('==', 'Harry')]} + ''' + + + if subset != {}: + # Format column names + subset = ff.format_dict_names(subset) + + sub_data = [] + for data in self.columnar_data: + valid = np.ones(len(data), dtype=bool) + + for key, value in subset.iteritems(): + if type(value) is not type(['a']): # Make all iterables + value = [value] + + # Merge tuples into a string + merged_values = [] + for val in value: + try: # check if val[1] is a string + eval(str(val[1])) + merged_values.append(val[0] + str(val[1])) + except: + merged_values.append(val[0] + "'" + val[1] + "'") + + for this_value in merged_values: + try: + this_valid = eval("data[key]" + this_value) + valid = np.logical_and(valid, this_valid) + except ValueError: #If key can't be found do nothing + pass + + sub_data.append(data[valid]) + + self.columnar_data = sub_data + + def split_up_data_by_field(self, split_columns=None): + ''' + This function will take in the split-columns list and and split the + data into separate arrays based on the list. For example, if one were + to pass in dbh1, dbh2, dbh3 three copies of the data would be + made, each being identical except that each would only contain one of + the instances of dbh. One could also pass [(dbh1, recr1), (dbh2, recr2), + (dbh3, recr3)]. All other fields in split_columns will be excluded + other than the fields within the tuple under consideration. + + Parameters + ---------- + split_columns : list + a list of tuples specifying the columns by which to split the array + + Notes + ----- + Saves the split array as self.columnar_data. + + ''' + #Note: If they enter the wrong column name nothing will be removed + #Should I error check for this? + if split_columns != None: + # Check if split_columns is a list of strings. If so, change it + # into a list of tuples + split_columns = [(s,) if type(s) == str else tuple(s) for s in + split_columns] + + # Format the names in each tuple + split_columns = [tuple(ff.format_headers(nms)) for nms in + split_columns] + + split_data = [] + given_col_names = [] + for tup in split_columns: + for name in tup: + given_col_names.append(name) + given_col_names = np.array(given_col_names) + + + for data in self.columnar_data: + for tup in split_columns: + ind = np.ones(len(given_col_names), dtype=bool) + for name in tup: + ind = np.bitwise_and((name != given_col_names), ind) + remove_names = given_col_names[ind] + split_data.append(drop_fields(data, list(remove_names))) + self.columnar_data = split_data + + def change_column_names(self, change=None, changed_to=None): + ''' + This function takes a list of column names to be changed and a name + that they should be changed to + + Parameters + ---------- + change : list of tuples or strings + Each tuple or string contains column names. All the column names in + the first tuple will be changed to the first element in the + changed_to list and so on. + changed_to : list + A list of strings that contain the names that the columns in change + will be changed to. + + Notes + ----- + This function is useful if you would like to merge self.columnar_data + but the dtype.names are different. + + ''' + if change != None and changed_to != None: + if len(change) != len(changed_to): + raise ValueError('Length of params change and changed_to must' + + ' be equal') + # Convert to tuples if just received strings + change = [(x,) if type(x) == str else tuple(x) for x in change] + + # Format the names in each tuple + change = [tuple(ff.format_headers(nms)) for nms in change] + + for data in self.columnar_data: + column_names = np.array(data.dtype.names) + for i, name_tup in enumerate(change): + for name in name_tup: + find = np.where((name == column_names))[0] + if len(find) != 0: + max_len = np.max([len(x) for x in column_names]) + if max_len < len(changed_to[i]): + column_names = column_names.astype('S' + + str(len(changed_to[i]))) + column_names[find[0]] = changed_to[i] + data.dtype.names = tuple(column_names) + + def add_fields_to_data_list(self, fields_values=None, descr='S20'): + ''' + This functions adds given fields and values to the data list. If the + length of the value for a given keyword in one, it will be broadcast to + the length of self.columnar_data. Else an error will be thrown. + + Parameters + ---------- + fields_values : dict + dictionary with keyword being the the field name to be added and + the value being a tuple with length self.columnar_data specifying + the values to be added to each field in each data set. + descr : a single data type or a dictionary + A single value will be broadcast to appropriate length. The + dictionary must have the same keywords as fields_values and must be + the same length. Each keyword should lookup a dtype. + ''' + if fields_values != None: + self.columnar_data = ff.add_data_fields(self.columnar_data, + fields_values, descr=descr) + + def remove_columns(self, col_names=None): + ''' + This function will remove the all the columns within with names in + col_names from all the datasets in self.columnar_data. + + Parameters + ---------- + col_names : string or list + The name or names of columns to be removed + + ''' + + if col_names != None: + + if type(col_names) == str: + col_names = [col_names] + else: + col_names = list(col_names) + + # Format column names + col_names = ff.format_headers(col_names) + + removed_data = [] + for data in self.columnar_data: + removed_data.append(drop_fields(data, col_names)) + self.columnar_data = removed_data + + def fractionate_data(self, wid_len=None, step=None, col_names=None, + wid_len_old=None, min_old=None, step_old=None): + ''' + This function converts grid numbers to length measurements in + self.columnar_data + + Parameters + ---------- + wid_len : tuple + A tuple containing the the absolute length of the columns being + converted + step : tuple + The desierd precision (step or stride length) of each grid. The + first element in the step tuple corresponds with the first element + in the wid_len tuple and so on. + col_names : array-like object + An array-like object of strings giving the names of the columns + that will be fractionated + wid_len_old : tuple or None + If None, it assumes that a np.unique on datayears[col_name[i]] + gives a array that is the same length as np.arange(0, + wid_len_new[i], step=step_new[i]). If it doesn't, an error will be + thrown. If not None, expects the old maximum length for the given + columns. + min_old : tuple or None + Same as wid_len_old but the old minimum value for each given column + step_old : tuple or None + Same as wid_len_old but the old step (or stride length/spacing) for + each given column. + + ''' + if wid_len != None and step != None and col_names != None: + self.columnar_data = ff.fractionate(self.columnar_data, wid_len, step, + col_names, wid_len_old=wid_len_old, + min_old=min_old, step_old=step_old) + + + def merge_data(self): + ''' + This function concatenates the data files in data_list. The dtypes of + the data in data_list must be identical or this function will fail. + ''' + + self.merged_data = ff.merge_formatted(self.columnar_data) + + def output_merged_data(self, filename): + ''' + This function merges self.columnar_data and outputs the merged data. + + Parameters + ---------- + filename : string + The filename to be output + + ''' + #Merge data in case it has not been done + self.merge_data() + ff.output_form(self.merged_data, filename) + + def output_columnar_data(self, filenames): + ''' + This function outputs the self.columnar_data + + Parameters + ---------- + filenames : list + A list of filenames + + ''' + assert len(filenames) == len(self.columnar_data), "Number of " + \ + "filenames must be the same as the number of datasets" + for i, name in enumerate(filenames): + ff.output_form(self.columnar_data[i], name) + +class Grid_Data: + '''This class handles data should look like the EarthFlow data after a + census. It is a grid with species abundance data in each cell. + ex. + ARTDRA - 6 + GERTYR - 8 + + ''' + + def __init__(self, filenames, archival=True, spp_sep='\n'): + ''' + Pass in the file name(s) of the grid data that you want converted and + the number of columns in each grid. + + Parameters + ---------- + + filenames : str or list of strings + A filename or list of filenames + + archival : bool + If True, a copy of self.grid_data is made and stored in + self.grid_archival. If dataset is very large, set to False. + + ''' + #NOTE: Handle missing data!!!! + + if type(filenames) == str: + filenames = [filenames] + + assert np.all(np.array([name.split('.')[-1] for name in filenames]) ==\ + 'csv'), "Files must be csv" + + self.grid_data = [] + self.cols = [] + self.rows =[] + + for i, name in enumerate(filenames): + # Sometimes csv.reader reads an extra column so you have to read to + # whole file. Seems stupid to read in the file twice but oh well... + with open(name, 'rb') as csvreader: + reader = csv.reader(csvreader) + rows = [row for row in reader] + min_len = np.min([len(row) for row in rows]) + self.cols.append(min_len) + + self.grid_data.append(csv2rec(name, names=list(np.arange(0,\ + self.cols[i]).astype('S10')))) + self.rows.append(len(self.grid_data[i])) + + #Remove all '\n' from the end of each cell in grid + #Not technically necessary but just being clean + self.grid_data = remove_char(self.grid_data, char=spp_sep) + self.grid_data = remove_white_spaces(self.grid_data) + + if archival == True: + self.grid_archival = [np.copy(data) for data in self.grid_data] + else: + self.grid_archival = [] + + def reset_grid_data(self): + ''' + Resets self.grid_data to self.archival_data + + Need to be careful about excessive memory usage! + ''' + + if len(self.grid_archival) == 0: + raise ValueError("The self.grid_archival attribute of this %s" + % (self.__class__.__name__) + " object has not" + + " been initialized") + else: + self.grid_data = [np.copy(data) for data in self.grid_archival] + + def truncate_grid_cells(self, symbol=None): + ''' + This function will look at each cell in grid list and truncated the + string within the cell at AND after the first instance of a given + symbol. + + Parameters + ---------- + symbol : string or list of strings + The symbol at which to being truncation + + Notes + ----- + symbol is a keyword argument because format_grid_data script gives the + option to run every method. + + ''' + if symbol != None: + + if type(symbol) == str: + symbol = [symbol] + else: + symbol = list(symbol) + + for i in xrange(len(self.grid_data)): + for nm in self.grid_data[i].dtype.names: + for j in xrange(len(self.grid_data[i][nm])): + for sym in symbol: + ind = self.grid_data[i][nm][j].find(sym) + if ind != -1: + self.grid_data[i][nm][j] = \ + self.grid_data[i][nm][j][:ind] + + self.grid_data = remove_char(self.grid_data) + + # List of remove replace tuples? + def remove_and_replace(self, remove=None, replace=''): + ''' + Removes a string from a grid cell and replaces it with another one + + Paramters + --------- + remove : string + String to be removed + replace : string + String to replace removed string + + ''' + + if remove != None and replace != None: + for i in xrange(len(self.grid_data)): + for nm in self.grid_data[i].dtype.names: + for j in xrange(len(self.grid_data[i][nm])): + self.grid_data[i][nm][j] =\ + self.grid_data[i][nm][j].replace(remove, replace) + + def find_unique_spp_in_grid(self, spacer='-', spp_sep='\n'): + ''' + This function finds all of the unique species in the grid. + It assumes that your grid data is in the proper format. + + Parameters + ---------- + spacer : str + The character separating the species code from the species count. + Default value is '-' (n-dash) + + spp_sep : str + The character that separates a speces/count combination from + another species/count combination. Default value is '\n' + + ''' + self.unq_spp_lists = [] + for num, data in enumerate(self.grid_data): + spp_names = [] + for col in data.dtype.names: + for row in xrange(self.rows[num]): + if data[col][row].find(spacer) != -1: + nam_lst = data[col][row].split(spacer) + if len(nam_lst) == 2: + spp_names.append(nam_lst[0].strip()) + else: + spp_names.append(nam_lst[0].strip()) + for i in xrange(1, len(nam_lst) - 1): + spp_names.append(nam_lst[i].split(spp_sep)[1].\ + strip()) + self.unq_spp_lists.append(np.unique(np.array(spp_names))) + + def grid_to_dense(self, spacer='-', spp_sep='\n', archival=True): + ''' + This function converts a the list of gridded data sets into dense + data sets and stores them in dense_data. In addition, it + makes a Dense_Data object out of the newly converted data. + + Parameters + ---------- + spacer : str + The character separating the species code from the species count. + Default value is '-' (n-slash) + + spp_sep : str + The character that separates a speces/count combination from + another species/count combination. Default value is '\n' + + + ''' + + self.find_unique_spp_in_grid(spacer=spacer, spp_sep=spp_sep) + dense_data = [] + for i, data in enumerate(self.grid_data): + dtype_list = [('cell', np.int), ('row', np.int), ('column', np.int)] + for name in self.unq_spp_lists[i]: + tuple_type = (name, np.float) + dtype_list.append(tuple_type) + matrix = np.empty(self.rows[i] * self.cols[i], dtype=dtype_list) + #Iterate through the plot + count = 0 + for col in data.dtype.names: + for row in xrange(self.rows[i]): + matrix['cell'][count] = count + matrix['row'][count] = row + matrix['column'][count] = int(col) + for spp_name in self.unq_spp_lists[i]: + + # Check if cell has species. May be nested occurence! + matrix[spp_name][count] = 0 # Set base to 0 + start = data[col][row].find(spp_name) + if start == -1: # Nothing is there + pass # Count already set to zero + + else: # Something is there, but is it nested? + found = start + while found != -1: + # If this is true, it is nested + if (data[col][row][start + len(spp_name)] !=\ + spacer) or not(start == 0 or \ + data[col][row][start - 1] == spp_sep): + + pass + + else: # Actually a species, so add some + # abundance + + raw = data[col][row][start:].split(spacer)[1] + if raw.find(spp_sep) != -1: + tot_spp = raw.split(spp_sep)[0].strip() + else: + tot_spp = raw.split()[0].strip() + matrix[spp_name][count] += float(tot_spp) + found = data[col][row][start + 1 + :].find(spp_name) + start += found + 1 + count += 1 + dense_data.append(matrix) + self.Dense_Object = Dense_Data(dense_data, archival=archival) + + + def output_grid_data(self, filenames): + ''' + This function prints the data within self.grid_data with the given + filenames. + + Parameters + ----------- + filenames : list + A list of filnames to which the data will be saved + + ''' + + assert len(filenames) == len(self.grid_data), "Number of filenames\ + must be the same as the number of datasets" + for i, data in enumerate(self.grid_data): + ff.output_form(data, filenames[i]) + + +class Dense_Data: + '''This class handles data that are in the dense format. An example of the + dense format is a csv file that has columns named 'row' and 'column' and + the remainder of columns named after each species in the plot. The values + within each species column are the counts within the cell specified by the + columns names 'row' and 'column'. + + Note: Need to consider how I might break this class + ''' + + def __init__(self, datalist, delim=',', replace=None, archival=True): + ''' + + Parameters + ----------- + datalist : string, list of strings or list of arrays + List of filenames to be loaded or list of arrays to be set to + self.dense_data + delim : string + The file delimiter + replace : tuple + A tuple of length 2. The first element is a string that + represents the missing values that you would like to replace. The + second element is the value with which you would like to replace + the missing values. + archival : bool + If True, a copy of self.dense_data is made and stored in + self.dense_archival. If dataset is very large, set to False. + + ''' + #TODO: What kind of files could break this + if type(datalist) == str: + datalist = [datalist] + + if np.all(np.array([type(x) == str for x in datalist])): + self.dense_data = [] + if replace != None: + + assert len(replace) == 2, "Replace must contain 2 elements" + + for name in datalist: + self.dense_data.append(replace_vals(name, replace, + delim=delim)) + else: + for name in datalist: + data = csv2rec(name, delimiter=delim) + self.dense_data.append(data) + + elif np.all(np.array([type(x) == np.ndarray for x in datalist])): + self.dense_data = datalist + + if archival: + self.dense_archival = [np.copy(data) for data in + self.dense_data] + else: + self.dense_archival = [] + + def reset_grid_data(self): + ''' + Resets self.grid_data to self.archival_data + + Need to be careful about excessive memory usage! + ''' + + if len(self.dense_archival) == 0: + raise ValueError("The self.dense_archival attribute of this %s" + % (self.__class__.__name__) + " object has not" + + " been initialized") + else: + self.dense_data = [np.copy(data) for data in self.dense_archival] + + + def dense_to_columnar(self, spp_col_num, num_spp, count_col='count',\ + archival=True): + ''' + This function uses a function in form_func to convert dense data into + columnar data. Stores the columnar data as a Columnar Object. + + Parameters + ---------- + spp_col_num : int + The column number in the dense array where the spp_names begin + + num_spp : tuple or int + Number of species in each dataset in self.dense_data. If it is an + int, it will be broadcasted to the length of self.dense_data + + count_col : str + This string specifies the name of the count column. The default is + 'count'. + + ''' + columnar_data = ff.format_dense(self.dense_data, spp_col_num,\ + num_spp, count_col=count_col) + self.Columnar_Object = Columnar_Data(columnar_data, archival=archival) + + def output_dense_data(self, filenames): + ''' + This function prints the data within self.dense_data with the given + filenames. If self.dense_data has not been filled, error is thrown. + + Parameters + ---------- + filenames : list + A list of filenames to which the data will be saved + + ''' + + assert len(filenames) == len(self.dense_data), "Number of filenames\ + must be the same as the number of datasets" + for i, data in enumerate(self.dense_data): + ff.output_form(data, filenames[i]) + +class Transect_Data: + ''' + This class handles data that are similar to the Breeding Bird survey data. + One column has the species ID, one column has stop and all the other + columns have transects. This class can handle data with "n" nestings, not + just two. For example, the data could have location, transect and stop. + + The "stop" data should all be in consecutive columns + + ''' + + def __init__(self, filenames, delim=',', replace=None, archival=True): + ''' + + Parameters + ---------- + filenames : list + A list of filenames + delim : string + The file delimiter + replace : tuple + A tuple of length 2. The first element is a string which + represents the missing values that you would like to replace. The + second element is the value with which you would like to replace + the missing values. + archival : bool + If True, a copy of self.transect_data is made and stored in + self.transect_archival. If dataset is very large, set to False. + + + ''' + self.transect_data = [] + if type(filenames) == str: + filenames = [filenames] + + if replace != None: + + assert len(replace) == 2, "Replace must contain 2 elements" + replace = (str(replace[0]), replace[1]) + + for name in filenames: + self.transect_data.append(replace_vals(name, replace, + delim=delim)) + else: + for name in filenames: + data = csv2rec(name, delimiter=delim) + self.transect_data.append(data) + + if archival: + self.transect_archival = [np.copy(data) for data in + self.transect_data] + else: + self.transect_archival = [] + + def reset_transect_data(self): + ''' + Resets self.transect_data to self.transect_archival + + Need to be careful about excessive memory usage! + ''' + if len(self.transect_archival) == 0: + raise ValueError("The self.transect_archival attribute of this %s" + % (self.__class__.__name__) + " object has not" + + " been initialized") + else: + self.transect_data = [np.copy(data) for data in + self.transect_archival] + + def transect_to_columnar(self, stop_col_num, tot_stops, stop_name='stop', + count_name='count', archival=True): + ''' + This function takes transect data and convertes it into columnar data. + In addition it saves the columnar data as a Columnar_Data object. + + + Parameters + ---------- + stop_col_num : iterable or int + The column number where the stop counts begin (0 is the first + column). Can be len(transect_data) or length == 1. Broadcast if + length equals 1. + + tot_stops : iterable or int + The number of columns with stops. Can be len(transect_data) or + length == 1. Broadcast if length equals 1. + + stop_name : str + The name of the new stop column in the formatted data + + count_name : str + The name of the count column. Default is "count" + + + Notes + ----- + This function assumes that all data in self.transect_data are formatted + the same way. For example, the column that contains species names or + codes has the same name throughout all data sets. + + ''' + # Broadcast stop_col_num + stop_col_num = ff.broadcast(len(self.transect_data), stop_col_num) + tot_stops = ff.broadcast(len(self.transect_data), tot_stops) + + columnar_data = [] + for j, data in enumerate(self.transect_data): + nstops = tot_stops[j] + dtypes = data.dtype.descr[ : stop_col_num[j] ] + if (len(dtypes) + nstops) != len(data.dtype.names): + #Accounting for data fields after stops + end_dtypes = data.dtype.descr[(len(dtypes) + nstops) : ] + for x in end_dtypes: + dtypes.append(x) + dtypes.append((stop_name, 'S20')) + dtypes.append((count_name, np.int)) + column_data = np.empty(len(data) * nstops, dtype=dtypes) + for i in xrange(len(data)): + for name in column_data.dtype.names: + if name is stop_name: + column_data[name][i * nstops:(i + 1) * nstops] = \ + np.arange(0, nstops) + elif name is count_name: + column_data[name][i * nstops:(i + 1) * nstops] = \ + np.array(list(data[i]))[stop_col_num[j] : \ + -len(end_dtypes)] + else: + column_data[name][i * nstops:(i + 1) * nstops] = \ + data[name][i] + # Remove all zeros + column_data = column_data[column_data[count_name] != 0] + columnar_data.append(column_data) + self.Columnar_Object = Columnar_Data(columnar_data, archival=archival) + + def output_transect_data(self, filenames): + ''' + This function prints the data within self.columnar_data with the given + filenames. If self.columnar_data has not been filled, an error is + thrown. + + Parameters + ---------- + filenames : list + A list of filenames to which the data will be saved. Must be the + same length as self.columnar_data + + ''' + + assert len(filenames) == len(self.transect_data), "Number of filenames\ + must be the same as the number of datasets" + for i, data in self.transect_data: + ff.output_form(data, filenames[i]) + + +def remove_char(grid_list, char='\n'): + ''' + Removes the given char from the end of each cell in grid list + ''' + + for grid in grid_list: + for name in grid.dtype.names: + for i in xrange(len(grid[name])): + while grid[name][i][::-1].find(char) == 0: + grid[name][i] = grid[name][i][:-1] + + return grid_list + +def remove_white_spaces(grid_list): + ''' + Removes all of the white spaces from strings. + ''' + for grid in grid_list: + for name in grid.dtype.names: + for i in xrange(len(grid[name])): + grid[name][i] = ''.join(grid[name][i].split(' ')) + + return grid_list + +def replace_vals(filename, replace, delim=','): + ''' + Replace the values in filename with specified values in replace_values + + Parameters + ---------- + filename : string + Will be read into a rec array + + replace_values : tuple + First object is value to replace and second object is what to replace + it with + + + ''' + data = csv2rec(filename, delimiter=delim, missing=replace[0]) + for nm in data.dtype.names: + try: + # Missing float + isNaN = (np.isnan(data[nm])) + except: + isNaN = np.zeros(len(data[nm]), dtype=bool) + isBlank = np.array([it == '' for it in data[nm]]) + isMinusOne = (data[nm] == -1)# Missing int + # Missing other + isNone = np.array([i == None for i in data[nm]]) + ind = np.bitwise_or(isNaN, isBlank) + ind = np.bitwise_or(ind, isMinusOne) + ind = np.bitwise_or(ind, isNone) + data[nm][ind] = replace[1] + return data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From ac69c78baa3293ddfdc3975d19e804cf94cad11a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 28 Mar 2014 11:48:46 -0700 Subject: [PATCH 185/343] Reactivate test_empirical, only emp cdf test for now --- .../{xest_empirical.py => test_empirical.py} | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) rename macroeco/empirical/{xest_empirical.py => test_empirical.py} (97%) diff --git a/macroeco/empirical/xest_empirical.py b/macroeco/empirical/test_empirical.py similarity index 97% rename from macroeco/empirical/xest_empirical.py rename to macroeco/empirical/test_empirical.py index 76f171d..630de38 100644 --- a/macroeco/empirical/xest_empirical.py +++ b/macroeco/empirical/test_empirical.py @@ -14,23 +14,26 @@ import numpy.testing as nt class TestEmpiricalCDF(TestCase): - """ Unittests for Empirical cdf """ - def test_empirical_cdf(self): - - #Test against R's ecdf function - - # Test Case 1 + def test_sorted_data(self): test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] - R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] + ans = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] res = empirical_cdf(test_data) - assert_array_equal(R_res, res) + assert_array_equal(ans, res['ecdf']) - # Test Case 2 + def test_unsorted_data(self): + test_data = [6, 6, 1, 1, 5, 1, 1, 2, 3, 4] + ans = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] + res = empirical_cdf(test_data) + assert_array_equal(ans, res['ecdf']) # Result sorted + assert_array_equal(np.sort(test_data), res['data']) # Data sorted + + def test_all_data_same(self): test_data = [3, 3, 3, 3] - R_res = [1, 1, 1, 1] + ans = [1, 1, 1, 1] res = empirical_cdf(test_data) - assert_array_equal(R_res, res) + assert_array_equal(ans, res['ecdf']) + # class TestPatch(unittest.TestCase): # def setUp(self): From 8092c116e76f57257c9634c8d7979be11c3e5564 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 28 Mar 2014 15:23:02 -0700 Subject: [PATCH 186/343] Format data takes kwargs and can eval them --- macroeco/misc/format_data.py | 68 ++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/macroeco/misc/format_data.py b/macroeco/misc/format_data.py index 0bb66f1..6b3cb43 100644 --- a/macroeco/misc/format_data.py +++ b/macroeco/misc/format_data.py @@ -1,4 +1,3 @@ - import numpy as np import pandas as pd @@ -8,8 +7,7 @@ def format_columnar(): pass -def format_dense(data_path, non_spp_cols, delimiter=",", na_values="", - item_col="spp", count_col="count", nan_to_zero=False, drop_na=False): +def format_dense(data_path, non_label_cols, evaluate=False, **kwargs): """ Formats dense data type to columnar data type. @@ -19,14 +17,16 @@ def format_dense(data_path, non_spp_cols, delimiter=",", na_values="", ---------- data_path : str A path to the dense data - non_spp_cols : list - A list of columns in the data that are not species columns + non_label_cols : list + A list of columns in the data that are not label columns + evaluate : bool + If True, eval values in kwargs delimiter : str The delimiter for the dense data. Default, "," - na_values : int, float, str, or list - Values to be labeled as NA. Default, "" + na_values : int, float, str + Value to be labeled as NA. Default, "" item_col : str - Name of the item column in the formatted data. Default, "spp" + Name of the item column in the formatted data. Default, "label" count_col : str Name of the count column in the formatted data. Default, "count" nan_to_zero : bool @@ -36,36 +36,58 @@ def format_dense(data_path, non_spp_cols, delimiter=",", na_values="", Notes ----- - Examples of Dense Data conversion + Examples of Dense Data conversion...TODO """ + kwargs = set_defaults_and_eval(kwargs, evaluate) - # Default arguments - base_data = pd.read_csv(data_path, sep=delimiter, - na_values=na_values) + base_data = pd.read_csv(data_path, sep=kwargs['delimiter'], + na_values=kwargs['na_values']) - # Stack data in columnar form - indexed_data = base_data.set_index(keys=non_spp_cols) - columnar_data = indexed_data.stack() + # Stack data in columnar form. + indexed_data = base_data.set_index(keys=non_label_cols) + columnar_data = indexed_data.stack(dropna=False) columnar_data = columnar_data.reset_index() + # Rename columns + num = len(non_label_cols) + columnar_data.rename(columns={0: kwargs['count_col'], 'level_%i' % num: + kwargs['label_col']}, inplace=True) + # Set nans to zero? - if nan_to_zero: - columnar_data[np.isnan(columnar_data)] = 0 + if kwargs['nan_to_zero']: + ind = np.isnan(columnar_data[kwargs['count_col']]) + columnar_data[kwargs['count_col']][ind] = 0 # Drop nans? - if drop_na: + if kwargs['drop_na']: columnar_data = columnar_data.dropna(how="any") - # Rename columns - num = len(non_spp_cols) - columnar_data.rename(columns={0: count_col, 'level_%i' % num: - item_col}, inplace=True) - return columnar_data +def set_defaults_and_eval(kwargs, evaluate): + """ + Sets default values in kwargs if kwargs are not already given + """ + + kwargs['delimiter'] = kwargs.get('delimiter', ',') + kwargs['na_values'] = kwargs.get('na_values', '') + kwargs['nan_to_zero'] = kwargs.get('nan_to_zero', False) + kwargs['drop_na'] = kwargs.get('drop_na', False) + kwargs['label_col'] = kwargs.get('label_col', 'label') + kwargs['count_col'] = kwargs.get('count_col', 'count') + + if evaluate: + for key, val in kwargs.iteritems(): + try: + kwargs[key] = eval(val) + except: + kwargs[key] = val + + return kwargs + def format_transect(): """ """ From 0e3a672073ff18033811631994bb7cdeb9075619 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 28 Mar 2014 15:23:35 -0700 Subject: [PATCH 187/343] Added dense data formatting for main --- macroeco/main/main.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index fdadd07..1cf1de3 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -5,8 +5,10 @@ import inspect import configparser from twiggy import log +import copy log = log.name('meco') + import numpy as np import pandas as pd @@ -95,9 +97,17 @@ def _do_format(options): """ analysis_name = options['analysis'] + datapath = os.path.normpath(os.path.join(options['param_dir'], + options['data'])) + temp_options = copy.deepcopy(options) if analysis_name == 'format_dense': - misc.format_dense() + + nlc = [nm.strip() for nm in temp_options['non_label_cols'].split(",")] + temp_options.pop('non_label_cols', None) + fdata = misc.format_dense(datapath, nlc, + evaluate=True, **temp_options) + elif analysis_name == 'format_columnar': misc.format_columnar() elif analysis_name == 'format_grid': @@ -105,7 +115,10 @@ def _do_format(options): elif analysis_name == 'format_transect': misc.format_transect() else: - raise NameError, "Cannot format data using analysis %s" % analysis_name + raise NameError("Cannot format data using analysis %s" % analysis_name) + + # Output formatted data + fdata.to_csv(os.path.splitext(datapath)[0] + "_formatted.csv", index=False) def _do_analysis(options): From 9c2496d5b66d66bd0ffcb61bdf54d2ee1d418d1d Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 1 Apr 2014 21:46:29 -0700 Subject: [PATCH 188/343] Fixed _init_ header --- macroeco/misc/__init__.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/macroeco/misc/__init__.py b/macroeco/misc/__init__.py index 257733a..1759d14 100644 --- a/macroeco/misc/__init__.py +++ b/macroeco/misc/__init__.py @@ -1,8 +1,3 @@ -from .misc import * -from .rcparams import * -from format_data import (format_columnar, format_dense, format_grid, - format_transect) -======= """ =============================== Misc (:mod:`macroeco.misc`) @@ -24,5 +19,6 @@ from .misc import (setup_log, _thread_excepthook, log_start_end, inherit_docstring_from, doc_sub) from .rcparams import ggplot_rc +from .format_data import (data_read_write, format_dense) _thread_excepthook() # Make desktop app catch and log sys except from thread From 5f3c06b7d815eb2dba2b6a3fb965fa025a537f38 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 1 Apr 2014 21:46:58 -0700 Subject: [PATCH 189/343] Used data_read_write function in main --- macroeco/main/main.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 67fccb1..d5d29fe 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -104,14 +104,12 @@ def _do_format(options): analysis_name = options['analysis'] datapath = os.path.normpath(os.path.join(options['param_dir'], options['data'])) - temp_options = copy.deepcopy(options) + + out_path = os.path.splitext(datapath)[0] + "_formatted.csv" if analysis_name == 'format_dense': - nlc = [nm.strip() for nm in temp_options['non_label_cols'].split(",")] - temp_options.pop('non_label_cols', None) - fdata = misc.format_dense(datapath, nlc, - evaluate=True, **temp_options) + misc.data_read_write(datapath, out_path, "dense", **options) elif analysis_name == 'format_columnar': misc.format_columnar() @@ -122,10 +120,6 @@ def _do_format(options): else: raise NameError("Cannot format data using analysis %s" % analysis_name) - # Output formatted data - fdata.to_csv(os.path.splitext(datapath)[0] + "_formatted.csv", index=False) - - def _do_analysis(options): """ Do analysis for a single run, as specified by options. From 3f7b69181fbfd609153ab84471102995d8016f74 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 1 Apr 2014 21:48:14 -0700 Subject: [PATCH 190/343] Made evaluation mandatory when setting defaults --- macroeco/misc/format_data.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/macroeco/misc/format_data.py b/macroeco/misc/format_data.py index 6b3cb43..7d005bb 100644 --- a/macroeco/misc/format_data.py +++ b/macroeco/misc/format_data.py @@ -79,12 +79,11 @@ def set_defaults_and_eval(kwargs, evaluate): kwargs['label_col'] = kwargs.get('label_col', 'label') kwargs['count_col'] = kwargs.get('count_col', 'count') - if evaluate: - for key, val in kwargs.iteritems(): - try: - kwargs[key] = eval(val) - except: - kwargs[key] = val + for key, val in kwargs.iteritems(): + try: + kwargs[key] = eval(val) + except: + kwargs[key] = val return kwargs From 0d46f357ef2e97f33c1280ec473f2adcffc33f75 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 1 Apr 2014 21:48:42 -0700 Subject: [PATCH 191/343] Added general data_read_write fxn --- macroeco/misc/format_data.py | 65 +++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/macroeco/misc/format_data.py b/macroeco/misc/format_data.py index 7d005bb..53592cd 100644 --- a/macroeco/misc/format_data.py +++ b/macroeco/misc/format_data.py @@ -1,13 +1,76 @@ import numpy as np import pandas as pd +# TODO: docstring inheritance + + +def data_read_write(data_path_in, data_path_out, format_type, **kwargs): + """ + General function to read, format, and write data. + + Parameters + ---------- + data_path_in : str + Path to the file that will be read + data_path_out : str + Path of the file that will be output + format_type : str + Either 'dense', 'grid', 'columnar', or 'transect' + kwargs + Specific keyword args for given data types. See Notes + + + Notes + ----- + + 'Dense Parameters' + + non_label_cols : str + Comma separated list of non label columns. ex. "lat, long, tree" + sep : str + The delimiter for the dense data. Default, "," + na_values : int, float, str + Value to be labeled as NA. Default, "" + + See misc.format_dense() for additional keyword parameters + """ + + if format_type == "dense": + + # Set dense defaults + kwargs = _set_dense_defaults_and_eval(kwargs) + + # Try to parse non label columns appropriately + try: + nlc = [nm.strip() for nm in kwargs['non_label_cols'].split(",")] + kwargs.pop('non_label_cols', None) + except KeyError: + raise KeyError("'non_label_cols' is a required keyword dense data") + + # Read data with dense specific keywords + arch_data = pd.read_csv(data_path_in, sep=kwargs['delimiter'], + na_values=kwargs['na_values']) + + form_data = format_dense(arch_data, nlc, **kwargs) + + elif format_type == "grid": + pass + elif format_type == "columnar": + pass + elif format_type == "transect": + pass + else: + raise NameError("%s is not a supported data format" % format_type) + + form_data.to_csv(data_path_out, index=False) + def format_columnar(): """ """ pass -def format_dense(data_path, non_label_cols, evaluate=False, **kwargs): +def format_dense(base_data, non_label_cols, **kwargs): """ Formats dense data type to columnar data type. From 320bb4a7d5ec7f565dfffd1d32bde11b07e5a393 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 1 Apr 2014 21:51:45 -0700 Subject: [PATCH 192/343] Made format_dense take DataFrame and output DataFrame per issue #60 --- macroeco/misc/format_data.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/macroeco/misc/format_data.py b/macroeco/misc/format_data.py index 53592cd..a093789 100644 --- a/macroeco/misc/format_data.py +++ b/macroeco/misc/format_data.py @@ -78,16 +78,10 @@ def format_dense(base_data, non_label_cols, **kwargs): Parameters ---------- - data_path : str - A path to the dense data + data: DataFrame + The dense data non_label_cols : list A list of columns in the data that are not label columns - evaluate : bool - If True, eval values in kwargs - delimiter : str - The delimiter for the dense data. Default, "," - na_values : int, float, str - Value to be labeled as NA. Default, "" item_col : str Name of the item column in the formatted data. Default, "label" count_col : str @@ -103,10 +97,6 @@ def format_dense(base_data, non_label_cols, **kwargs): """ - kwargs = set_defaults_and_eval(kwargs, evaluate) - - base_data = pd.read_csv(data_path, sep=kwargs['delimiter'], - na_values=kwargs['na_values']) # Stack data in columnar form. indexed_data = base_data.set_index(keys=non_label_cols) @@ -130,9 +120,22 @@ def format_dense(base_data, non_label_cols, **kwargs): return columnar_data -def set_defaults_and_eval(kwargs, evaluate): +def _set_dense_defaults_and_eval(kwargs): """ - Sets default values in kwargs if kwargs are not already given + Sets default values in kwargs if kwargs are not already given. + + Evaluates all results incase some arguments are given as string + + Parameters + ----------- + kwargs : dict + Dictionary of dense specific keyword args + + Returns + ------- + : dict + Default, evaluated dictionary + """ kwargs['delimiter'] = kwargs.get('delimiter', ',') From 1ffb22afeaebe3ca3d8be3b74e3ff80a0b8dbbbe Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 2 Apr 2014 11:07:41 -0700 Subject: [PATCH 193/343] Always use count_col, clean up docstrings/names --- macroeco/empirical/empirical.py | 40 +++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index b90639e..1010b0b 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -1,6 +1,7 @@ from __future__ import division import os import re +import copy from configparser import ConfigParser import itertools from copy import deepcopy @@ -249,7 +250,7 @@ def _subset_table(full_table, subset): def _subset_meta(full_meta, subset): """ - Return subtable matching all conditions in subset. + Return subset of metadata matching all conditions in subset. Parameters ---------- @@ -269,6 +270,7 @@ def _subset_meta(full_meta, subset): conditions = subset.replace(' ','').split(';') + # TODO: This works for numeric, modify to do nothing for categorical cols for condition in conditions: condition_list = re.split('[<>=]', condition) col = condition_list[0] @@ -310,7 +312,7 @@ def sad(patch, cols, splits='', clean=True): Returns ------- {1} Result has two columns: spp (species identifier) and y (individuals of - that species). + that species). Notes ----- @@ -321,20 +323,19 @@ def sad(patch, cols, splits='', clean=True): """ spp_col, count_col = _get_cols(['spp_col', 'count_col'], cols, patch) + count_col, patch = _fallback_count_col(count_col, patch) + full_spp_list = np.unique(patch.table[spp_col]) # Loop through each split result_list = [] - for substring, subtable in _yield_subtables(patch, splits): + for substring, subpatch in _yield_subpatches(patch, splits): # Get abundance for each species sad_list = [] for spp in full_spp_list: - this_spp = (subtable[spp_col] == spp) - if count_col: - count = np.sum(subtable[count_col][this_spp]) - else: - count = np.sum(this_spp) + this_spp = (subpatch.table[spp_col] == spp) + count = np.sum(subpatch.table[count_col][this_spp]) sad_list.append(count) # Create dataframe of spp names and abundances @@ -350,6 +351,18 @@ def sad(patch, cols, splits='', clean=True): # Return all results return result_list +def _fallback_count_col(count_col, patch): + """ + Determine if count_col is None (not included in cols string). If None, add + a column named count_col to the patch table so it can be used in further + analysis. + """ + if count_col: + return count_col, patch + else: + count_col = 'count_col' + patch.table['count_col'] = np.ones(len(patch.table)) + return count_col, patch @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) @@ -364,7 +377,7 @@ def ssad(patch, cols, splits=''): Returns ------- {1} Result has one column giving the individuals of species in each - subplot. + subplot. Notes ----- @@ -1200,7 +1213,7 @@ def _get_cols(special_col_names, cols, patch): @doc_sub(splits_note) -def _yield_subtables(patch, splits): +def _yield_subpatches(patch, splits): """ Iterator for subtables defined by a splits string @@ -1226,9 +1239,12 @@ def _yield_subtables(patch, splits): subset_list = _parse_splits(patch, splits) for subset in subset_list: log.info('Analyzing subset: %s' % subset) - yield subset, _subset_table(patch.table, subset) + subpatch = copy.copy(patch) + subpatch.table = _subset_table(patch.table, subset) + subpatch.meta = _subset_meta(patch.meta, subset) + yield subset, subpatch else: - yield '', patch.table + yield '', patch @doc_sub(splits_note) From 1ad6b2439d54dc7cdd21d3ab59cdb186b04f17fb Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 2 Apr 2014 11:07:55 -0700 Subject: [PATCH 194/343] Add few todo notes --- macroeco/main/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 97cbad4..5c57bb7 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -468,7 +468,9 @@ def _write_comparison_plots_tables(spid, models, options, core_results, - cdf vs emp cdf - rad vs rad """ - # TODO: More general function for RAD that deals with -0.5/len issue + # TODO: Implement more general function for RAD (model.rank) + # TODO: Write only RAD, remove rest + # TODO: Add second plot to RAD showing residual core_result = core_results[spid][1] n_vals = len(core_result) From 78ff6d0ad59ea6efff3bc4a3eeb474d03f41d55b Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 2 Apr 2014 14:19:23 -0700 Subject: [PATCH 195/343] Add parameter file editor to GUI, closes #63 --- desktop.py | 120 ++++++++++++++++++++++++++++-------------- macroeco/main/main.py | 4 ++ 2 files changed, 85 insertions(+), 39 deletions(-) diff --git a/desktop.py b/desktop.py index 53aced3..1692d14 100755 --- a/desktop.py +++ b/desktop.py @@ -30,8 +30,8 @@ class MainWindow(wx.Frame): def __init__(self, parent, title): wx.Frame.__init__(self, parent, title=title) self.t = None + self.filename = '' self.dirname = '' - self.parampath = '' self.InitUI() self.Show(True) @@ -43,46 +43,49 @@ def InitUI(self): head_font = wx.Font(18, wx.SWISS, wx.NORMAL, wx.BOLD) heading = wx.StaticText(self, label='Macroeco Desktop') sizerhead.Add(heading, 0, wx.EXPAND) - #heading.SetFont(head_font) + heading.SetFont(head_font) # Step 1 - sizer1 = wx.BoxSizer(wx.VERTICAL) - sizer1a = wx.BoxSizer(wx.HORIZONTAL) - sizer1b = wx.BoxSizer(wx.HORIZONTAL) - - choose_text = wx.StaticText(self, - label='1. Choose a parameters file'+' '*20) - - choose_button = wx.Button(self, label='Open') - self.Bind(wx.EVT_BUTTON, self.OnOpen, choose_button) - - # Make attribute so easily modified by other methods - self.choose_msg = wx.StaticText(self, - label='') - #self.choose_msg.SetFont(wx.Font(11, wx.SWISS, wx.SLANT, wx.NORMAL)) - - sizer1a.Add(choose_text, 1, wx.EXPAND) - sizer1a.Add(choose_button, 0, wx.EXPAND) - sizer1b.Add(self.choose_msg, 1, wx.EXPAND) - - sizer1.Add(sizer1a, 0, wx.EXPAND) - sizer1.Add(sizer1b, 0, wx.EXPAND) + sizer1 = wx.BoxSizer(wx.HORIZONTAL) + + param_text = wx.StaticText(self, + label=("1. Open or create a parameter file\n" + " File can be edited below and saved")) + self.open_button = wx.Button(self, label='Open') + self.new_button = wx.Button(self, label='New') + self.save_button = wx.Button(self, label='Save') + self.save_button.Enable(False) + + sizer1.Add(param_text, 1, wx.EXPAND) + sizer1.Add(self.open_button, 0, wx.EXPAND | wx.RIGHT, 6) + sizer1.Add(self.new_button, 0, wx.EXPAND | wx.RIGHT, 6) + sizer1.Add(self.save_button, 0, wx.EXPAND) + + # Bind open and new buttons + self.Bind(wx.EVT_BUTTON, self.OnOpen, self.open_button) + self.Bind(wx.EVT_BUTTON, self.OnNew, self.new_button) + self.Bind(wx.EVT_BUTTON, self.OnSave, self.save_button) + + # Param window + sizerpfile = wx.BoxSizer(wx.HORIZONTAL) + self.pfile = wx.TextCtrl(self, wx.ID_ANY, size=(600,300), + style=wx.TE_MULTILINE|wx.HSCROLL) + sizerpfile.Add(self.pfile, 1, wx.EXPAND) # Step 2 sizer2 = wx.BoxSizer(wx.HORIZONTAL) - run_text = wx.StaticText(self, - label='2. Run analysis') + run_text = wx.StaticText(self, label='2. Run analysis') self.run_button = wx.Button(self, label='Run') sizer2.Add(run_text, 1, wx.EXPAND) sizer2.Add(self.run_button, 0, wx.EXPAND) - # Updating process + # Bind run button self.Bind(wx.EVT_BUTTON, self.OnRun, self.run_button) # Output window sizerlogbox = wx.BoxSizer(wx.HORIZONTAL) - self.logbox = wx.TextCtrl(self, wx.ID_ANY, size=(500,400), - style = wx.TE_MULTILINE|wx.TE_READONLY|wx.HSCROLL) + self.logbox = wx.TextCtrl(self, wx.ID_ANY, size=(600,150), + style=wx.TE_MULTILINE|wx.TE_READONLY|wx.HSCROLL) sizerlogbox.Add(self.logbox, 1, wx.EXPAND) # Redirect text here @@ -94,9 +97,12 @@ def InitUI(self): self.Bind(wx.EVT_IDLE, self.OnIdle) # All items + sizer_main = wx.BoxSizer(wx.VERTICAL) sizer_main.Add(sizerhead, 0, wx.EXPAND | wx.ALL, 12) + sizer_main.Add(sizer1, 0, wx.EXPAND | wx.ALL, 12) + sizer_main.Add(sizerpfile, 0, wx.EXPAND | wx.ALL, 12) sizer_main.Add(sizer2, 0, wx.EXPAND | wx.ALL, 12) sizer_main.Add(sizerlogbox, 0, wx.EXPAND | wx.ALL, 12) @@ -105,17 +111,52 @@ def InitUI(self): self.SetAutoLayout(True) sizer_main.Fit(self) + def defaultFileDialogOptions(self): + ''' Return a dictionary with file dialog options that can be + used in both the save file dialog as well as in the open + file dialog. ''' + return dict(message='Choose a file', defaultDir=self.dirname, + wildcard='*.*') + + def askUserForFilename(self, **dialogOptions): + dialog = wx.FileDialog(self, **dialogOptions) + if dialog.ShowModal() == wx.ID_OK: + userProvidedFilename = True + self.filename = dialog.GetFilename() + self.dirname = dialog.GetDirectory() + else: + userProvidedFilename = False + dialog.Destroy() + return userProvidedFilename + def OnOpen(self,e): - self.filename = '' - self.dirname = '' - dlg = wx.FileDialog(self, 'Choose a parameters file', self.dirname, - '', '*.*', wx.OPEN) - if dlg.ShowModal() == wx.ID_OK: - self.filename = dlg.GetFilename() - self.dirname = dlg.GetDirectory() - self.choose_msg.SetLabel(' Parameters file selected') - self.parampath = os.path.join(self.dirname, self.filename) - dlg.Destroy() + if self.askUserForFilename(style=wx.OPEN, + **self.defaultFileDialogOptions()): + parampath = os.path.join(self.dirname, self.filename) + f = open(parampath, 'r') + self.pfile.SetValue(f.read()) + f.close() + + self.save_button.Enable(True) + + self.logbox.SetValue('') + print "File opened at " + os.path.join(self.dirname, self.filename) + + def OnNew(self,e): + if self.askUserForFilename(style=wx.SAVE, + **self.defaultFileDialogOptions()): + self.OnSave(e, new_file=True) + + def OnSave(self, event, new_file=False): + f = open(os.path.join(self.dirname, self.filename), 'w') + f.write(self.pfile.GetValue()) + f.close() + + self.logbox.SetValue('') + if new_file: + print "File created at "+os.path.join(self.dirname, self.filename) + else: + print "File saved at " + os.path.join(self.dirname, self.filename) def OnRun(self,e): self.logbox.SetValue('') @@ -123,7 +164,8 @@ def OnRun(self,e): def RunMain(self): self.run_button.Enable(False) # Turn the run button off - self.t = thread.Thread(target=main, args=(self.parampath,)) + parampath = os.path.join(self.dirname, self.filename) + self.t = thread.Thread(target=main, args=(parampath,)) self.t.daemon = True # Kills thread if app exits self.t.start() diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 5c57bb7..2c7b98a 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -81,6 +81,10 @@ def _get_params_base_options(param_path): run_names = params.sections() + # Check there's at least one run + if not run_names: + raise NameError, "Parameters file must contain at least one run" + # Create options dict base_options = {} base_options['param_dir'] = param_dir From 649cfc048a29b926b14a69e4e9d9e3d2797c8e60 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 2 Apr 2014 20:16:02 -0700 Subject: [PATCH 196/343] Add SAR, EAR, comm_grid --- macroeco/empirical/__init__.py | 5 +- macroeco/empirical/empirical.py | 454 +++++++++++++++++--------------- 2 files changed, 251 insertions(+), 208 deletions(-) diff --git a/macroeco/empirical/__init__.py b/macroeco/empirical/__init__.py index a1991b8..ba0ba04 100644 --- a/macroeco/empirical/__init__.py +++ b/macroeco/empirical/__init__.py @@ -24,9 +24,12 @@ sad ssad + sar + ear + comm_grid """ from .empirical import (Patch, - sad, ssad, + sad, ssad, sar, ear, comm_grid, empirical_cdf) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 1010b0b..932ee66 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -67,6 +67,14 @@ each into two equally sized divisions) within each of the three years, for a total of 12 separate analyses.""" +division_note = \ + """The parameter divisions describes how to successively divide the patch + along the x_col and y_col dimensions. For + example, the string '1,2; 2,2; 2,4' will produce an output table with three + rows, giving the result across two subplots when the patch is split + along y_col, across four subplots when the patch is split into a 2x2 grid, + and across eight subplots when the patch is split into 2 parts along x_col + and 4 parts along y_col.""" class Patch(object): """ @@ -297,7 +305,7 @@ def _subset_meta(full_meta, subset): @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) -def sad(patch, cols, splits='', clean=True): +def sad(patch, cols, splits, clean=True): """ Calculates an empirical species abundance distribution @@ -322,8 +330,8 @@ def sad(patch, cols, splits='', clean=True): """ - spp_col, count_col = _get_cols(['spp_col', 'count_col'], cols, patch) - count_col, patch = _fallback_count_col(count_col, patch) + (spp_col, count_col), patch = \ + _get_cols(['spp_col', 'count_col'], cols, patch) full_spp_list = np.unique(patch.table[spp_col]) @@ -351,22 +359,10 @@ def sad(patch, cols, splits='', clean=True): # Return all results return result_list -def _fallback_count_col(count_col, patch): - """ - Determine if count_col is None (not included in cols string). If None, add - a column named count_col to the patch table so it can be used in further - analysis. - """ - if count_col: - return count_col, patch - else: - count_col = 'count_col' - patch.table['count_col'] = np.ones(len(patch.table)) - return count_col, patch @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) -def ssad(patch, cols, splits=''): +def ssad(patch, cols, splits): """ Calculates an empirical intra-specific spatial abundance distribution @@ -408,176 +404,260 @@ def ssad(patch, cols, splits=''): return result_list -def sar(self, div_cols, div_list, criteria, form='sar', output_N=False): - ''' - Calculate an empirical species-area relationship given criteria. +@log_start_end +@doc_sub(metric_params, metric_return, cols_note, splits_note, division_note) +def sar(patch, cols, splits, divs): + """ + Calculates an empirical species area relationship Parameters ---------- - div_cols : tuple - Column names to divide, eg, ('x', 'y'). Must be metric. - div_list : list of tuples - List of division pairs in same order as div_cols, eg, [(2,2), - (2,4), (4,4)]. Values are number of divisions of div_col. - criteria : dict - See docstring for EPatch.sad. Here, criteria SHOULD NOT include - items referring to div_cols (if there are any, they are ignored). - form : string - 'sar' or 'ear' for species or endemics area relationship. EAR is - relative to the subtable selected after criteria is applied. - output_N : bool - Adds the column N to the output rec array which contains the - average N for a given area. + {0} + divs : str + Description of how to divide x_col and y_col. See notes. + full_output : bool + If True, tuples in result contain a third element with a row for each + area (as in the main result) and columns containing richness for all + suplots at that division. Returns ------- - rec_sar: structured array - Returns a structured array with fields 'items' and 'area' that - contains the average items/species for each given area specified by - critieria. - full_result : list of ndarrays - List of same length as areas containing arrays with element for - count of species or endemics in each subpatch at corresponding - area. - ''' + {1} Result has three columns, div, x, and y, that give the ID for the + division given as an argument, fractional area, and the mean species + richness at that division. - # If any element in div_cols in criteria, remove from criteria - criteria = {k: v for k, v in criteria.items() if k not in div_cols} + Notes + ----- + {2} - # Loop through div combinations (ie, areas), calc sad, and summarize - areas = [] - mean_result = [] - full_result = [] - N_result = [] + For the SAR, cols must also contain x_col and y_col, giving the x and y + dimensions along which to grid the patch. - for div in div_list: + {3} - # Add divs to criteria dict - this_criteria = deepcopy(criteria) - for i, col in enumerate(div_cols): - this_criteria[col] = div[i] + {4} - # Get flattened sad for all criteria and this div - sad_return = self.sad(this_criteria) + """ - if output_N: - N_result.append(np.mean([sum(sad[1]) for sad in sad_return])) + (spp_col, x_col, y_col), patch = \ + _get_cols(['spp_col', 'x_col', 'y_col'], cols, patch) - flat_sad = flatten_sad(sad_return)[1] + # Loop through each split + result_list = [] + for substring, subpatch in _yield_subpatches(patch, splits): - # Store results - if form == 'sar': - this_full = np.sum((flat_sad > 0), axis=0) - this_mean = np.mean(this_full) - elif form == 'ear': - totcnt = np.sum(flat_sad, axis=1) - totcnt_arr = \ - np.array([list(totcnt),]*np.shape(flat_sad)[1]).transpose() + # Loop through all divisions within this split + subresultx = [] + subresulty = [] + subdivlist = divs.split(';') + for divs in subdivlist: + spatial_table = _yield_spatial_table(subpatch, divs, spp_col, + x_col, y_col) + subresulty.append(np.mean(spatial_table['spp_count'])) # n spp + subresultx.append(1 / eval(divs.replace(',', '*'))) # a frac - this_full = np.sum(np.equal(flat_sad, totcnt_arr), axis=0) - this_mean = np.mean(this_full) - else: - raise NotImplementedError('No SAR of form %s available' % form) + # Append subset result + subresult = pd.DataFrame({'div': subdivlist, 'x': subresultx, + 'y': subresulty}) + result_list.append((substring, subresult)) - full_result.append(this_full) - mean_result.append(this_mean) + # Return all results + return result_list - # Store area - area = 1 - for i, col in enumerate(div_cols): - dmin = self.data_table.meta[(col, 'minimum')] - dmax = self.data_table.meta[(col, 'maximum')] - dprec = self.data_table.meta[(col, 'precision')] - length = (dmax + dprec - dmin) - area *= length / div[i] +@log_start_end +@doc_sub(metric_params, metric_return, cols_note, splits_note, division_note) +def ear(patch, cols, splits, divisions): + """ + Calculates an empirical endemics area relationship - areas.append(area) + Parameters + ---------- + {0} + divisions : str + Description of how to divide x_col and y_col. See notes. - # Return - if not output_N: - rec_sar = np.array(zip(mean_result, areas), dtype=[('items', - np.float), ('area', np.float)]) - else: - rec_sar = np.array(zip(mean_result, N_result, areas), - dtype=[('items', np.float), ('N', np.float), ('area', np.float)]) + Returns + ------- + {1} Result has three columns, div, x, and y, that give the ID for the + division given as an argument, fractional area, and the mean number of + endemics per cell at that division. - return rec_sar, full_result + Notes + ----- + {2} + For the EAR, cols must also contain x_col and y_col, giving the x and y + dimensions along which to grid the patch. -def universal_sar(self, div_cols, div_list, criteria, include_full=False): - ''' - Calculates the empirical universal sar given criteria. The universal - sar calculates the slope of the SAR and the ratio of N / S at all - the areas in div_cols (where N is the total number of species and S is - the total number of species). + {3} + + {4} + + """ + + (spp_col, x_col, y_col), patch = \ + _get_cols(['spp_col', 'x_col', 'y_col'], cols, patch) + + # Loop through each split + result_list = [] + for substring, subpatch in _yield_subpatches(patch, splits): + + all_spp = np.unique(subpatch.table[spp_col]) + + # Loop through all divisions within this split + subresultx = [] + subresulty = [] + subdivlist = divs.split(';') + for divs in subdivlist: + spatial_table = _yield_spatial_table(subpatch, divs, spp_col, + x_col, y_col) - This function assumes that the div_list contains halvings. If they are - not, the function will still work but the results will be meaningless. - An example a of div_list with halvings is: + endemic_counter = 0 + for spp in all_spp: + spp_in_cell = [spp in x for x in spatial_table['spp_set']] + spp_n_cells = np.sum(spp_in_cell) + if spp_n_cells == 1: # If a spp is in only 1 cell, endemic + endemic_counter += 1 - [(1,1), (1,2), (2,2), (2,4), (4,4)] + n_cells = len(spatial_table) + subresulty.append(endemic_counter / n_cells) # mean endemics / cell + subresultx.append(1 / eval(divs.replace(',', '*'))) # a frac + + # Append subset result + subresult = pd.DataFrame({'div': subdivlist, 'x': subresultx, + 'y': subresulty}) + result_list.append((substring, subresult)) + + # Return all results + return result_list + +@log_start_end +@doc_sub(metric_params, metric_return, cols_note, splits_note) +def comm_grid(patch, cols, splits, divs, metric='Sorensen'): + """ + Calculates commonality as a function of distance for a gridded patch Parameters ---------- - div_cols : tuple - Column names to divide, eg, ('x', 'y'). Must be metric. - div_list : list of tuples - List of division pairs in same order as div_cols, eg, [(2,2), - (2,4), (4,4)]. Values are number of divisions of div_col. - criteria : dict - See docstring for EPatch.sad. Here, criteria SHOULD NOT include - items referring to div_cols (if there are any, they are ignored). - include_full : bool - If include_full = True, the division (1,1) will be included if it - was now already included. Else it will not be included. (1,1) is - equivalent to the full plot - + {0} + divs : str + Description of how to divide x_col and y_col. Unlike SAR and EAR, only + one division can be given at a time. See notes. + metric : str + One of Sorensen or Jaccard, giving the metric to use for commonality + calculation Returns ------- - z_array : a structured array - Has the columns names: - 'z' : slope of the SAR at the given area - 'S' : Number of species at the given division - 'N' : Number of individuals at the given division - 'N/S' : The ratio of N/S at the given division + {1} Result has three columns, pair, x, and y, that give the locations of + the pair of patches for which commonality is calculated, the distance + between those cells, and the Sorensen or Jaccard result. + + Notes + ----- + {2} + + For gridded commonality, cols must also contain x_col and y_col, giving the + x and y dimensions along which to grid the patch. + + {3} + + """ + + (spp_col, x_col, y_col), patch = \ + _get_cols(['spp_col', 'x_col', 'y_col'], cols, patch) + + # Loop through each split + result_list = [] + for substring, subpatch in _yield_subpatches(patch, splits): + + # Get spatial table and break out columns + spatial_table = _yield_spatial_table(subpatch, divs, spp_col, + x_col, y_col) + spp_set = spatial_table['spp_set'] + cell_loc = spatial_table['cell_loc'] + spp_count = spatial_table['spp_count'] + + # Get all possible pairwise combinations of cells + pair_list = [] + dist_list = [] + comm_list = [] + for i in range(len(spatial_table)): + for j in range(i+1, len(spatial_table)): + + pair_list.append(str(cell_loc[i]) + '-' + str(cell_loc[j])) + + dist_list.append(_distance(cell_loc[i], cell_loc[j])) + + ij_intersect = spp_set[i] & spp_set[j] + if metric.lower() == 'sorensen': + comm = 2*len(ij_intersect) / (spp_count[i]+spp_count[j]) + elif metric.lower() == 'jaccard': + comm = len(ij_intersect) / len(spp_set[i] | spp_set[j]) + else: + raise ValueError, ("Only Sorensen and Jaccard metrics are " + "available for gridded commonality") + comm_list.append(comm) + # Append subset result + subresult = pd.DataFrame({'pair': pair_list, 'x': dist_list, + 'y': comm_list}) + result_list.append((substring, subresult)) + + # Return all results + return result_list + + +def _yield_spatial_table(patch, div, spp_col, x_col, y_col): + """ + Calculates an empirical spatial table + + Yields + ------- + DataFrame + Spatial table for each division. See Notes. Notes ----- - If you give it n divisions in div_list you will get a structured array - back that has length n - 2. Therefore, if you only have one - ''' + The spatial table is the precursor to the SAR, EAR, and grid-based + commonality metrics. Each row in the table corresponds to a cell created by + a given division. Columns are cell_loc (within the grid defined by the + division), spp_count, and spp_set. - # If (1,1) is not included, include it - if include_full: - try: - div_list.index((1,1)) - except ValueError: - div_list.insert(0, (1,1)) + """ - # Run sar with the div_cols - sar = self.sar(div_cols, div_list, criteria, output_N=True)[0] + div_split_list = div.replace(';','').split(',') + div_split = (x_col + ':' + div_split_list[0] + ';' + + y_col + ':' + div_split_list[1]) - # sort by area - sar = np.sort(sar, order=['area'])[::-1] + # Get cell_locs + # Requires _parse_splits and _product functions to go through x then y + x_starts, x_ends = _col_starts_ends(patch, x_col, div_split_list[0]) + x_offset = (x_ends[0] - x_starts[0]) / 2 + x_locs = x_starts + x_offset - # Calculate z's - if len(sar) >= 3: # Check the length of sar - z_list = [z(sar['items'][i - 1], sar['items'][i + 1]) for i in - np.arange(1, len(sar)) if sar['items'][i] != sar['items'][-1]] - else: - return np.empty(0, dtype=[('z', np.float), ('S', np.float), ('N', - np.float), ('N/S', np.float)]) + y_starts, y_ends = _col_starts_ends(patch, y_col, div_split_list[1]) + y_offset = (y_ends[0] - y_starts[0]) / 2 + y_locs = y_starts + y_offset + + cell_locs = _product(x_locs, y_locs) + + # Get spp set and count for all cells + spp_count_list = [] # Number of species in cell + spp_set_list = [] # Set object giving unique species IDs in cell + for cellstring, cellpatch in _yield_subpatches(patch, div_split): + spp_set = set(np.unique(cellpatch.table[spp_col])) + spp_set_list.append(spp_set) + spp_count_list.append(len(spp_set)) + + # Create and return dataframe + df = pd.DataFrame({'cell_loc': cell_locs, 'spp_count': spp_count_list, + 'spp_set': spp_set_list}) + return df - N_over_S = sar['N'][1:len(sar) - 1] / sar['items'][1:len(sar) - 1] - z_array = np.array(zip(z_list, sar['items'][1:len(sar) - 1], - sar['N'][1:len(sar) - 1], N_over_S), dtype=[('z', np.float), ('S', - np.float), ('N', np.float), ('N/S', np.float)]) - return z_array def comm_sep(self, plot_locs, criteria, loc_unit=None): ''' @@ -1104,24 +1184,8 @@ def tsed(self, criteria, normalize=True, exponent=0.75): return result -def flatten_sad(sad): - ''' - Takes a list of tuples, like sad output, ignores keys, and converts values - into a 2D array with each value as a column (ie, species in rows, samples - in columns. - ''' - - combs = [cmb[0] for cmb in sad] - result = np.zeros((len(sad[0][1]), len(sad))) - - for i, tup in enumerate(sad): - result[:,i] = tup[1] - - return combs, result - - -def distance(pt1, pt2): - ''' Calculate Euclidean distance between two points ''' +def _distance(pt1, pt2): + """Euclidean distance between two points""" return np.sqrt((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) @@ -1146,41 +1210,6 @@ def decdeg_distance(pt1, pt2): return km -def divisible(dividend, precision, divisor, tol = 1e-9): - ''' - Check if dividend (here width or height of patch) is evenly divisible by - divisor (here a number of patch divs) while accounting for floating point - rounding issues. - ''' - if divisor == 0: - return False - if divisor > round(dividend / precision): - return False - - quot_raw = (dividend / precision) / divisor - quot_round = round(quot_raw) - diff = abs(quot_raw - quot_round) - - if diff < tol: - return True - else: - return False - - -def rnd(num): - ''' - Round num to number of decimal places in precision. Used to avoid issues - with floating points in the patch and subpatch width and height that make - subpatches not lie exactly on even divisions of patch. - ''' - return round(num, 6) - -def z(doubleS, halfS): - '''Calculates the z for a double S value and a half S value''' - - return np.log(doubleS / halfS) / (2 * np.log(2)) - - def _get_cols(special_col_names, cols, patch): """ @@ -1207,10 +1236,16 @@ def _get_cols(special_col_names, cols, patch): # Get special_col_names from dict result = [] for special_col_name in special_col_names: - result.append(col_dict.get(special_col_name, None)) + col_name = col_dict.get(special_col_name, None) - return tuple(result) + # Create a count col if its requested and doesn't exist + if special_col_name is 'count_col' and col_name is None: + col_name = 'count' + patch.table['count'] = np.ones(len(patch.table)) + result.append(col_name) + + return tuple(result), patch @doc_sub(splits_note) def _yield_subpatches(patch, splits): @@ -1280,12 +1315,7 @@ def _parse_splits(patch, splits): level_list = [col + '==' + str(x) + ';' for x in np.unique(patch.table[col])] else: - col_step = eval(patch.meta[col]['step']) # eval converts to float - col_min = eval(patch.meta[col]['min']) - col_max = eval(patch.meta[col]['max']) - step = (col_max - col_min + col_step) / eval(val) - starts = np.arange(col_min, col_max + col_step, step) - ends = starts + step + starts, ends = _col_starts_ends(patch, col, val) level_list = [col + '>=' + str(x) + '; ' + col + '<' + str(y)+'; ' for x, y in zip(starts, ends)] @@ -1295,6 +1325,18 @@ def _parse_splits(patch, splits): return [''.join(x)[:-2] for x in _product(*subset_list)] +def _col_starts_ends(patch, col, slices): + + col_step = eval(patch.meta[col]['step']) # eval converts to float + col_min = eval(patch.meta[col]['min']) + col_max = eval(patch.meta[col]['max']) + step = (col_max - col_min + col_step) / eval(slices) + starts = np.arange(col_min, col_max + col_step, step) + ends = starts + step + + return starts, ends + + def _product(*args, **kwds): """ Generates cartesian product of lists given as arguments @@ -1309,8 +1351,6 @@ def _product(*args, **kwds): return result - - def empirical_cdf(data): """ Generates an empirical cdf from data. From 6a8b482e4265b19e8b7240bde551bc66001db5a8 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 2 Apr 2014 22:22:25 -0700 Subject: [PATCH 197/343] cnbinom fit converts input to array --- macroeco/models/_distributions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index fd40b4a..09f842d 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -503,6 +503,8 @@ def translate_args(self, mu, k_agg, b): @inherit_docstring_from(rv_discrete_meco) def fit_mle(self, data, b=None, k_range=(0.1, 100, 0.1)): + + data = np.array(data) mu = np.mean(data) if not b: From 90e1403e3c6deea66b731d5b8c59359df73b62a0 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 2 Apr 2014 22:23:21 -0700 Subject: [PATCH 198/343] preliminary unittesting of cnbinom. Issue #3 closed --- macroeco/models/test_distributions.py | 95 +++++++++++++++++---------- 1 file changed, 60 insertions(+), 35 deletions(-) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 5eb27de..125c640 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -146,43 +146,68 @@ def test_fit_mle_with_manual_calc(self): assert_array_almost_equal([mu, k], [9, 8.54], decimal=2) class TestCnbinom(TestCase): - pass - - # def test_zillio_plots(self): - # """ Test the cnbinom function replicated the Zillio and He plots """ - - # # Define Preliminary - # a = np.array([0.1, .3, .8]) - # k = np.array([.1, 1, 10]) - # fnbd_vec = [] - # nbd_vec = [] - # binm_vec = [] - # descrip = [] - - # # Get data - # for ta in a: - # for tk in k: - # fnbd_vec.append(cnbinom.pmf(np.arange(1,101), ta*100, tk, 100)) - # nbd_vec.append(nbinom.pmf(np.arange(1,101), ta*100, tk)) - # binm_vec.append(stats.binom.pmf(np.arange(1,101), 100, ta)) - - # descrip.append("a=%s, k=%s" % (ta, tk)) - - # # Loop through the data and plot it. - # for i in xrange(len(fnbd_vec)): - # plt.clf() - # plt.plot(np.arange(1,101), fnbd_vec[i]) - # plt.plot(np.arange(1,101), nbd_vec[i], '--') - # plt.plot(np.arange(1,101), binm_vec[i], '.-') - # plt.legend(('fnbd', 'nbd', 'binm'), loc='best') - # plt.xlabel('abundance') - # plt.ylabel('P(x)') - # plt.ylim((0, .12)) - # plt.text(plt.xlim()[1] * 0.6, plt.ylim()[1] * 0.8, descrip[i]) - # plt.show() - # plt.clf() + def test_pmf(self): + # Test pmf sums to one + pmf = cnbinom.pmf(np.arange(0, 101), 20, 1, 100) + assert_almost_equal(np.sum(pmf), 1) + def test_cdf(self): + # Test cdf is one at appropriate value + cdf = cnbinom.cdf(100, 20, 1, 100) + assert_almost_equal(cdf, 1) + + def test_fit_of_vector(self): + # Test fit of vector from Issue #3 (github.com/jkitzes/macroeco) + data = np.array([3,2,1,0,0,0,0,0,0,0,0,0,0,0,0]) + k_fit = cnbinom.fit_mle(data)[0] + assert_equal(False, k_fit == -0.26) + + def test_zillio_plots(self): + """ Test the cnbinom function replicated the Zillio and He plots + + References + ---------- + Zillio, T and He, F. 2010. Modeling spatial aggregation of finite + populations. Ecology, 91, 3698-3706 + + """ + + # Define Preliminary a and k to test + a = np.array([0.1, .3, .8]) + k = np.array([.1, 1, 10]) + fnbd_vec = [] + nbd_vec = [] + binm_vec = [] + descrip = [] + + # Get data + for ta in a: + for tk in k: + + fnbd_vec.append(cnbinom.pmf(np.arange(1, 101), + ta * 100, tk, 100)) + nbd_vec.append(nbinom.pmf(np.arange(1, 101), ta * 100, tk)) + binm_vec.append(stats.binom.pmf(np.arange(1, 101), 100, ta)) + + descrip.append("a=%s, k=%s" % (ta, tk)) + + # Loop through the data and plot it + fig, axes = plt.subplots(3, 3, sharex=True) + axes = axes.flatten() + + for i, ax in enumerate(axes): + ax.plot(np.arange(1, 101), fnbd_vec[i]) + ax.plot(np.arange(1, 101), nbd_vec[i], '--') + ax.plot(np.arange(1, 101), binm_vec[i], '.-') + ax.legend(('fnbd', 'nbd', 'binm'), loc='best') + ax.set_xlabel('abundance') + ax.set_ylabel('P(x)') + ax.text(0.6, 0.3, descrip[i], transform=ax.transAxes) + + plt.tight_layout() + # Uncomment to see save figure + #fig.savefig("test_cbinom") class TestExpon(TestCase): pass From d37499110dae6ab56ff40a1875c2818bfd5e1586 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 3 Apr 2014 08:37:24 -0700 Subject: [PATCH 199/343] Added rank method to nbinom_gen --- macroeco/models/_distributions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 09f842d..d0edba9 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -405,6 +405,11 @@ def fit_mle(self, data, k_range=(0.1, 100, 0.1)): mu = np.mean(data) return mu, _solve_k_from_mu(data, k_range, nbinom_nll, mu) + @inherit_docstring_from(rv_discrete_meco) + def rank(self, n, *args): + """{0}""" + return self.ppf((np.arange(1, n+1) - 0.5) / n, *args) + def _get_p_from_mu(self, mu, k_agg): return k_agg / (k_agg + mu) From 2fbb2c6ef586a2fbe17c374bba9d859074b0f3af Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 3 Apr 2014 08:38:14 -0700 Subject: [PATCH 200/343] Moved cdf and pmf plotting to end of main --- macroeco/main/main.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 2c7b98a..f4ad9a0 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -575,3 +575,41 @@ def _pad_plot_frame(ax, pad=0.01): ax.set_ylim(ymin - yrange*pad, ymax + yrange*pad) return ax + + +def _output_cdf_plot(core_result, spid, models, options, fit_results): + """Function for plotting cdf""" + + # CDF + x = core_result['y'].values + df = emp.empirical_cdf(x) + df.columns = ['x', 'empirical'] + + def calc_func(model, df, shapes): + return eval("mod.%s.cdf(df['x'], *shapes)" % model) + + plot_exec_str = "ax.step(df['x'], emp, color='k', lw=3);ax.set_ylim(top=1)" + + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_cdf', + df, calc_func, plot_exec_str) + + +def output_pdf_plot(core_result, spid, models, options, fit_results): + """ Function for plotting pdf/pmf """ + # PDF/PMF + hist_bins = 11 + emp_hist, edges = np.histogram(core_result['y'].values, hist_bins, + normed=True) + x = (np.array(edges[:-1]) + np.array(edges[1:])) / 2 + df = pd.DataFrame({'x': x, 'empirical': emp_hist}) + + def calc_func(model, df, shapes): + try: + return eval("mod.%s.pmf(np.floor(df['x']), *shapes)" % model) + except: + return eval("mod.%s.pdf(df['x'], *shapes)" % model) + + plot_exec_str = "ax.bar(df['x']-width/2, emp, width=width, color='gray')" + + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_pdf', + df, calc_func, plot_exec_str) From 4ec0bea2e172fa4adde807d5872d11fa29c7bf87 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 3 Apr 2014 08:38:35 -0700 Subject: [PATCH 201/343] Added residual plotting string --- macroeco/main/main.py | 49 ++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index f4ad9a0..ec77795 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -486,48 +486,31 @@ def _write_comparison_plots_tables(spid, models, options, core_results, df.insert(0, 'x', x) def calc_func(model, df, shapes): - return eval("mod.%s.ppf((df['x']-0.5)/len(df), *shapes)" % model)[::-1] + return eval("mod.%s.rank(len(df['x']), *shapes)" % model)[::-1] - plot_exec_str="ax.scatter(df['x'], emp, color='k');ax.set_yscale('log')" + plot_exec_str = "ax.scatter(df['x'], emp, color='k');ax.set_yscale('log')"\ + + ";ax.set_xlabel('rank');ax.set_ylabel('abundance')" - _save_table_and_plot(spid, models, options, fit_results, 'data_pred_rad', - df, calc_func, plot_exec_str) - - # CDF - x = core_result['y'].values - df = emp.empirical_cdf(x) - df.columns = ['x', 'empirical'] - - def calc_func(model, df, shapes): - return eval("mod.%s.cdf(df['x'], *shapes)" % model) - - plot_exec_str = "ax.step(df['x'], emp, color='k', lw=3);ax.set_ylim(top=1)" + plot_exec_str_resid = \ + "ax.hlines(0, 1, np.max(df['x']));" + \ + "ax.set_ylim((-1 * np.max(emp), np.max(emp)));" + \ + "ax.set_xlabel('rank');ax.set_ylabel('residual')" - _save_table_and_plot(spid, models, options, fit_results, 'data_pred_cdf', - df, calc_func, plot_exec_str) - - # PDF/PMF - hist_bins = 11 - emp_hist, edges = np.histogram(core_result['y'].values, hist_bins, - normed=True) - x = (np.array(edges[:-1]) + np.array(edges[1:])) / 2 - df = pd.DataFrame({'x': x, 'empirical': emp_hist}) - - def calc_func(model, df, shapes): - try: - return eval("mod.%s.pmf(np.floor(df['x']), *shapes)" % model) - except: - return eval("mod.%s.pdf(df['x'], *shapes)" % model) - - plot_exec_str = "ax.bar(df['x']-width/2, emp, width=width, color='gray')" + _save_table_and_plot(spid, models, options, fit_results, 'data_pred_rad', + df, calc_func, [plot_exec_str, plot_exec_str_resid]) - _save_table_and_plot(spid, models, options, fit_results, 'data_pred_pdf', - df, calc_func, plot_exec_str) def _save_table_and_plot(spid, models, options, fit_results, name, df, calc_func, plot_exec_str): + """ + Saves plot and tables of core result and residuals + plot_exec_str : list + List of strings to be executed when plotting. 1st string should be + core result plotting and second string should be residual plotting + + """ f_path = _get_file_path(spid, options, '%s.csv' % name) p_path = _get_file_path(spid, options, '%s.pdf' % name) From 99b34485b19c314db91d487e792c5a65a95271ae Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 3 Apr 2014 08:38:57 -0700 Subject: [PATCH 202/343] Reformatted figure to take two subplots --- macroeco/main/main.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index ec77795..e09ce12 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -526,28 +526,32 @@ def _save_table_and_plot(spid, models, options, fit_results, name, df, # returned in the empirical calculation. df_plt = df.set_index('x') # Figure emp = df_plt['empirical'] - df_plt = df_plt.drop('empirical',1) - try: - width = df['x'].values[1] - df['x'].values[0] - except: - width = 1 - ax = df_plt.plot(lw=3) - exec plot_exec_str - ax = _pad_plot_frame(ax) - fig = ax.get_figure() + # Make axes + fig, axes = plt.subplots(1, len(plot_exec_str), figsize=(12, 5)) + axes = axes.flatten() + + names = [models, [mod + "_residual" for mod in models]] + for i, ax in enumerate(axes): + + ax.plot(df['x'], df_plt[names[i]]) + exec plot_exec_str[i] + ax.legend(names[i] + ['empirical']) + ax = _pad_plot_frame(ax, left=ax.get_xlim()[0], + bottom=ax.get_ylim()[0]) + fig.savefig(p_path) plt.close('all') -def _pad_plot_frame(ax, pad=0.01): +def _pad_plot_frame(ax, pad=0.01, left=0, bottom=0): """ Provides padding on sides of frame equal to pad fraction of plot """ - ax.set_xlim(left=0) - ax.set_ylim(bottom=0) + ax.set_xlim(left=left) + ax.set_ylim(bottom=bottom) xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() From 5e3c084d4c5dbc1fbe116e2092862ee5fc78f12b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 3 Apr 2014 08:39:24 -0700 Subject: [PATCH 203/343] Added residual calculation --- macroeco/main/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index e09ce12..d98bb4f 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -468,13 +468,8 @@ def _write_comparison_plots_tables(spid, models, options, core_results, Only applies to analysis using functions from empirical in which models are also given. - - pdf/pmf vs histogram - - cdf vs emp cdf - rad vs rad """ - # TODO: Implement more general function for RAD (model.rank) - # TODO: Write only RAD, remove rest - # TODO: Add second plot to RAD showing residual core_result = core_results[spid][1] n_vals = len(core_result) @@ -519,6 +514,7 @@ def _save_table_and_plot(spid, models, options, fit_results, name, df, shapes = fit_result[0] result = calc_func(model, df, shapes) df[model] = result + df[model + "_residual"] = np.sort(df['empirical'])[::-1] - result df.to_csv(f_path, index=False, float_format='%.4f') # Table From c67b86c9eb4fa712cb72ec9d78f47cc3eed64554 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 3 Apr 2014 16:42:03 -0700 Subject: [PATCH 204/343] Refactor distribution plot function --- macroeco/main/main.py | 118 +++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 66 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index d98bb4f..ee0b16e 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -11,8 +11,9 @@ import numpy as np import pandas as pd -import matplotlib.pyplot as plt import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt from .. __init__ import __version__ from .. import empirical as emp @@ -373,13 +374,17 @@ def _save_results(options, module, core_results, fit_results): if module == 'emp': _write_subset_index_file(options, core_results) - if fit_results: # If models given - for i, core_result in enumerate(core_results): - models = options['models'].replace(' ','').split(';') - _write_fitted_params(i, models, options, fit_results) - _write_test_statistics(i, models, options, fit_results) - _write_comparison_plots_tables(i, models, options, - core_results, fit_results) + # Write model/data comparison if models were given + if fit_results: + models = options['models'].replace(' ','').split(';') + if 'x' in core_results[0][1]: # If df has an x col, curve + pass + else: # distribution + for i, core_result in enumerate(core_results): + _write_fitted_params(i, models, options, fit_results) + _write_test_statistics(i, models, options, fit_results) + _write_distribution_plot_table(i, models, options, + core_results, fit_results) def _write_core_tables(options, module, core_results): @@ -460,7 +465,7 @@ def _write_test_statistics(spid, models, options, fit_results): f.close() -def _write_comparison_plots_tables(spid, models, options, core_results, +def _write_distribution_plot_table(spid, models, options, core_results, fit_results): """ Notes @@ -468,94 +473,75 @@ def _write_comparison_plots_tables(spid, models, options, core_results, Only applies to analysis using functions from empirical in which models are also given. - - rad vs rad """ core_result = core_results[spid][1] n_vals = len(core_result) - # RAD + # Set x (rank) and y in df x = np.arange(n_vals) + 1 df = core_result.sort(columns='y', ascending=False) df.rename(columns={'y': 'empirical'}, inplace=True) df.insert(0, 'x', x) - def calc_func(model, df, shapes): - return eval("mod.%s.rank(len(df['x']), *shapes)" % model)[::-1] - - plot_exec_str = "ax.scatter(df['x'], emp, color='k');ax.set_yscale('log')"\ - + ";ax.set_xlabel('rank');ax.set_ylabel('abundance')" - - plot_exec_str_resid = \ - "ax.hlines(0, 1, np.max(df['x']));" + \ - "ax.set_ylim((-1 * np.max(emp), np.max(emp)));" + \ - "ax.set_xlabel('rank');ax.set_ylabel('residual')" - - _save_table_and_plot(spid, models, options, fit_results, 'data_pred_rad', - df, calc_func, [plot_exec_str, plot_exec_str_resid]) - - - -def _save_table_and_plot(spid, models, options, fit_results, name, df, - calc_func, plot_exec_str): - """ - Saves plot and tables of core result and residuals - - plot_exec_str : list - List of strings to be executed when plotting. 1st string should be - core result plotting and second string should be residual plotting - - """ - f_path = _get_file_path(spid, options, '%s.csv' % name) - p_path = _get_file_path(spid, options, '%s.pdf' % name) - + # Add residual column for each model for model in models: fit_result = fit_results[spid][model] shapes = fit_result[0] - result = calc_func(model, df, shapes) + result = eval("mod.%s.rank(len(df['x']), *shapes)" % model)[::-1] df[model] = result - df[model + "_residual"] = np.sort(df['empirical'])[::-1] - result + df[model + "_residual"] = result - df['empirical'] - df.to_csv(f_path, index=False, float_format='%.4f') # Table - - # TODO: We only want x and models here, not any other cols that might be - # returned in the empirical calculation. - df_plt = df.set_index('x') # Figure - emp = df_plt['empirical'] + # Set up file paths + f_path = _get_file_path(spid, options, 'rank_data_model.csv') + p_path = _get_file_path(spid, options, 'rank_data_model.pdf') - # Make axes - fig, axes = plt.subplots(1, len(plot_exec_str), figsize=(12, 5)) - axes = axes.flatten() + # Save table + df.to_csv(f_path, index=False, float_format='%.4f') # Table - names = [models, [mod + "_residual" for mod in models]] - for i, ax in enumerate(axes): + # Save plot + fig, (ax1, ax2) = plt.subplots(1, 2) + + ax1.plot(df['x'], df[models]) + ax1.scatter(df['x'], df['empirical'], color='k') + ax1.legend(models + ['empirical']) + ax1.set_xlabel('rank') + ax1.set_ylabel('value') + ax1.set_xlim(left=0) + + ax2.plot(df['x'], df[[x + '_residual' for x in models]]) + ax2.hlines(0, 1, np.max(df['x'])) + ax2.legend(models + ['empirical']) + ax2.set_ylim((-1 * np.max(df['empirical']), np.max(df['empirical']))) + ax2.set_xlabel('rank') + ax2.set_ylabel('residual') + + if options.get('log_rank', None): + ax1.set_yscale('log') + else: + ax1.set_ylim(bottom=0) - ax.plot(df['x'], df_plt[names[i]]) - exec plot_exec_str[i] - ax.legend(names[i] + ['empirical']) - ax = _pad_plot_frame(ax, left=ax.get_xlim()[0], - bottom=ax.get_ylim()[0]) + ax1 = _pad_plot_frame(ax1) + ax2 = _pad_plot_frame(ax2) + fig.tight_layout() fig.savefig(p_path) plt.close('all') -def _pad_plot_frame(ax, pad=0.01, left=0, bottom=0): +def _pad_plot_frame(ax, pad=0.01): """ Provides padding on sides of frame equal to pad fraction of plot """ - ax.set_xlim(left=left) - ax.set_ylim(bottom=bottom) - xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() - xrange = xmax - xmin - yrange = ymax - ymin + xr = xmax - xmin + yr = ymax - ymin - ax.set_xlim(xmin - xrange*pad, xmax + xrange*pad) - ax.set_ylim(ymin - yrange*pad, ymax + yrange*pad) + ax.set_xlim(xmin - xr*pad, xmax + xr*pad) + ax.set_ylim(ymin - yr*pad, ymax + yr*pad) return ax From 3ec1a35165a21ff04674f3cddbb97f70c5328a5d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 3 Apr 2014 17:01:24 -0700 Subject: [PATCH 205/343] Fix inheritance of nbinom (tests still pass), closes #64 --- macroeco/models/_distributions.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index d0edba9..7ff7f21 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -356,7 +356,7 @@ def p_eq(x, mu, b): _geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) -class nbinom_gen(spdist.nbinom_gen): +class nbinom_gen(rv_discrete_meco): r""" A negative binomial discrete random variable. @@ -405,11 +405,6 @@ def fit_mle(self, data, k_range=(0.1, 100, 0.1)): mu = np.mean(data) return mu, _solve_k_from_mu(data, k_range, nbinom_nll, mu) - @inherit_docstring_from(rv_discrete_meco) - def rank(self, n, *args): - """{0}""" - return self.ppf((np.arange(1, n+1) - 0.5) / n, *args) - def _get_p_from_mu(self, mu, k_agg): return k_agg / (k_agg + mu) From 8bab18a8c20c47ea137a3c5283f56e86a67515fa Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 3 Apr 2014 22:31:21 -0700 Subject: [PATCH 206/343] Added documentation and changed columnar to stacked --- macroeco/misc/format_data.py | 59 +++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/macroeco/misc/format_data.py b/macroeco/misc/format_data.py index a093789..0a58f45 100644 --- a/macroeco/misc/format_data.py +++ b/macroeco/misc/format_data.py @@ -1,9 +1,6 @@ import numpy as np import pandas as pd -# TODO: docstring inheritance - - def data_read_write(data_path_in, data_path_out, format_type, **kwargs): """ General function to read, format, and write data. @@ -19,7 +16,6 @@ def data_read_write(data_path_in, data_path_out, format_type, **kwargs): kwargs Specific keyword args for given data types. See Notes - Notes ----- @@ -55,7 +51,7 @@ def data_read_write(data_path_in, data_path_out, format_type, **kwargs): elif format_type == "grid": pass - elif format_type == "columnar": + elif format_type == "stacked": pass elif format_type == "transect": pass @@ -64,21 +60,17 @@ def data_read_write(data_path_in, data_path_out, format_type, **kwargs): form_data.to_csv(data_path_out, index=False) -def format_columnar(): - """ - """ - pass def format_dense(base_data, non_label_cols, **kwargs): """ - Formats dense data type to columnar data type. + Formats dense data type to stacked data type. Takes in a dense data type and converts into a stacked data type. Parameters ---------- - data: DataFrame + data : DataFrame The dense data non_label_cols : list A list of columns in the data that are not label columns @@ -91,13 +83,45 @@ def format_dense(base_data, non_label_cols, **kwargs): drop_na : bool Drop all columns with nan in the dataset. Default, False - Notes - ----- - Examples of Dense Data conversion...TODO + Returns + ------- + : DataFrame + A formatted DataFrame in the stacked format + Notes + ----- + Example of Dense Data conversion + + >>> import pandas as pd + >>> dense_data = pd.DataFrame({'row' : [1,2,1,2], 'column' : [1,1,2,2], + 'labelA': [1,0,3,4], 'labelB' : [3,2,1,4]}) + + >>> dense_data + column labelA labelB row + 0 1 1 3 1 + 1 1 0 2 2 + 2 2 3 1 1 + 3 2 4 4 2 + + [4 rows x 4 columns] + >>> stacked_data = format_dense(dense_data, ['row', 'column']) + >>> stacked_data + row column label count + 0 1 1 labelA 1 + 1 1 1 labelB 3 + 2 2 1 labelA 0 + 3 2 1 labelB 2 + 4 1 2 labelA 3 + 5 1 2 labelB 1 + 6 2 2 labelA 4 + 7 2 2 labelB 4 + + [8 rows x 4 columns] """ + kwargs = _set_dense_defaults_and_eval(kwargs) + # Stack data in columnar form. indexed_data = base_data.set_index(keys=non_label_cols) columnar_data = indexed_data.stack(dropna=False) @@ -124,7 +148,7 @@ def _set_dense_defaults_and_eval(kwargs): """ Sets default values in kwargs if kwargs are not already given. - Evaluates all results incase some arguments are given as string + Evaluates all values using eval Parameters ----------- @@ -153,6 +177,11 @@ def _set_dense_defaults_and_eval(kwargs): return kwargs +def format_stacked(): + """ + """ + pass + def format_transect(): """ """ From ea9391e868451795de52cb4170c48bcaf2a685fa Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 7 Apr 2014 23:00:29 -0700 Subject: [PATCH 207/343] Added logser_uptrunc to docs --- macroeco/models/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 0c8e02d..fe9033a 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -31,6 +31,7 @@ geom_uptrunc nbinom cnbinom + logser_uptrunc .. DV: Our public-facing distributions do not use location and scale parameters, as @@ -38,6 +39,6 @@ """ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, - expon, expon_uptrunc) + logser_uptrunc, expon, expon_uptrunc) from ._curves import (power_law) From d482e0110277a7eeb89ce6fbeb45f0852fac75a4 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 7 Apr 2014 23:01:02 -0700 Subject: [PATCH 208/343] Added logser_uptrunc to models --- macroeco/models/_distributions.py | 146 +++++++++++++++++++++++++++++- 1 file changed, 143 insertions(+), 3 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 7ff7f21..15a2b4b 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -1,4 +1,5 @@ from __future__ import division +import sys from decimal import Decimal import numpy as np @@ -6,7 +7,7 @@ from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, docdict_discrete) -import scipy.stats.distributions as spdist +import scipy.stats as stats import scipy.optimize as optim import scipy.special as special @@ -461,7 +462,7 @@ class cnbinom_gen(rv_discrete_meco): r""" The conditional negative binomial random variable - This distribution was described by Zillio and He (2010) [#]_ and Conlisk + This distribution was described by Zillio and He (2010) [#]_ and Conlisk et al. (2007) [#]_ .. math:: @@ -474,7 +475,7 @@ class cnbinom_gen(rv_discrete_meco): Methods ------- - translate_args(mu, k_agg) + translate_args(mu, k_agg, b) not used, returns mu, k_agg, and b. fit_mle(data, k_range=(0.1,100,0.1)) ml estimate of shape parameters mu and k_agg given data, with k_agg evaluated at (min, max, step) values given by k_range. @@ -551,6 +552,145 @@ def _ln_choose(n, k_agg): return gammaln(n + 1) - (gammaln(k_agg + 1) + gammaln(n - k_agg + 1)) +class logser_uptrunc_gen(rv_discrete_meco): + r""" + Upper truncated logseries random variable + + This distribuiton was described by Harte (2011) [#]_ + + .. math:: + + p(x) = \frac{1}{Z} \frac{p^n}{n} + + where ``Z`` is the normalizing factor + + Methods + ------- + translate_args(mu, b) + Translates the mean and the upper bound into p and b. + fit_mle(data) + ml estimate of shape parameter p + %(before_notes)s + p : float + p parameter of the logseries distribution + b : float + Upper bound of the distribution + + + Notes + ----- + Code adapted from Ethan White's macroecology_tools and version 0.1 of + macroeco + + References + ----------- + .. [#] + Harte, J. (2011). Maximum Entropy and Ecology: A Theory of + Abundance, Distribution, and Energetics. Oxford, United + Kingdom: Oxford University Press. + + + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu, b): + return _trunc_logser_solver((1 / mu) * b, b), b + + @inherit_docstring_from(rv_discrete_meco) + def fit_mle(self, data, b=None): + + data = np.array(data) + length = len(data) + + if not b: + b = np.sum(data) + + return _trunc_logser_solver(length, b), b + + def _pmf(self, x, p, b): + x = np.array(x) + + if p[0] > 0: + pmf = stats.logser.pmf(x, p) / stats.logser.cdf(b, p) + else: + ivals = np.arange(1, b[0] + 1) + normalization = sum(p[0] ** ivals / ivals) + pmf = (p[0] ** x / x) / normalization + + return pmf + + def _cdf(self, x, p, b): + x = np.array(x) + if p[0] < 1: + return stats.logser.cdf(x, p) / stats.logser.cdf(b, p) + else: + cdf_list = [sum(self.pmf(range(1, int(x_i) + 1), p[0], b[0])) for + x_i in x] + return np.array(cdf_list) + + def _rvs(self, p, b): + # Code from weecology/macroecotools + + out = [] + if p < 1: + for i in range(self._size): + rand_logser = stats.logser.rvs(p) + while rand_logser > b: + rand_logser = stats.logser.rvs(p) + out.append(rand_logser) + else: + rand_list = stats.uniform.rvs(size = self._size) + for rand_num in rand_list: + y = lambda x: self.cdf(x, p, b) - rand_num + if y(1) > 0: out.append(1) + else: out.append(int(round(bisect(y, 1, b)))) + return np.array(out) + + def _stats(self, p, b): + vals = np.arange(1, b + 1) + mu = np.sum(vals * self.pmf(vals, p, b)) + var = np.sum(vals ** 2 * self.pmf(vals, p, b)) - mu ** 2 + return mu, var, None, None + + +logser_uptrunc = logser_uptrunc_gen(name="logser_uptrunc", shapes="p, b") + + +def _trunc_logser_solver(bins, b): + """ + Given bins (S) and b (N) solve for MLE of truncated logseries + parameter p + + Parameters + ----------- + bins : float + Number of bins. Considered S in an ecological context + b : float + Upper truncation of distribution + + Returns + ------- + : float + MLE estimate of p + + Notes + ------ + Adapted from Ethan White's macroecology_tools + """ + + if bins == b: + p = 0 + + else: + BOUNDS = [0, 1] + DIST_FROM_BOUND = 10 ** -15 + m = np.array(np.arange(1, np.int(b) + 1)) + y = lambda x: np.sum(x ** m / b * bins) - np.sum((x ** m) / m) + p = optim.bisect(y, BOUNDS[0] + DIST_FROM_BOUND, + min((sys.float_info[0] / bins) ** (1 / b), 2), + xtol=1.490116e-08) + return p + # # Continuous # From d5e960a5922505c717627cd54f75a9f81816f2d1 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 7 Apr 2014 23:01:15 -0700 Subject: [PATCH 209/343] Unittested logser_uptrunc --- macroeco/models/test_distributions.py | 75 +++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 125c640..c4b39d8 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -12,6 +12,7 @@ import numpy as np from decimal import Decimal from macroeco.models import * +from macroeco.models._distributions import _trunc_logser_solver import matplotlib.pyplot as plt import scipy.stats as stats @@ -209,6 +210,80 @@ def test_zillio_plots(self): # Uncomment to see save figure #fig.savefig("test_cbinom") + +class TestLogserUptrunc(TestCase): + + def test_pmf(self): + # import macroeco_distributions as mac + # mac.trunc_logser(.8, 100).pmf(4) + test_val = logser_uptrunc(.8, 100).pmf(4) + assert_almost_equal(test_val, 0.063624697299) + + # import macroeco_distributions as mac + # mac.trunc_logser(.45, 3).pmf(3) + test_val = logser_uptrunc(.45, 3).pmf(3) + assert_almost_equal(test_val, 0.052224371373307543) + + def test_cdf(self): + # import macroeco_distributions as mac + # mac.trunc_logser(.8, 100).cdf(4) + test_val = logser_uptrunc(.8, 100).cdf(4) + assert_almost_equal(test_val, 0.86556098617469057) + + # import macroeco_distributions as mac + # mac.trunc_logser(.45, 3).cdf(2) + test_val = logser_uptrunc(.45, 3).cdf(2) + assert_array_almost_equal(test_val, 0.9477756286266924) + + def test_mean(self): + # Expected mean is N / S + + N = 500 + S = 30. + p = logser_uptrunc.translate_args(N / S, N)[0] + mean = logser_uptrunc.stats(p, N)[0] + assert_almost_equal(mean, N / S, decimal=5) + + def test_fit_mle(self): + # Should return same result as translate args + data = np.arange(1, 40) + N = np.sum(data) + S = len(data) + + fits = logser_uptrunc.fit_mle(data) + assert_array_almost_equal(fits, + logser_uptrunc.translate_args(N / S, N), + decimal=5) + + def test_translate_args(self): + # Test that values equal values from John's book (Harte 2011) + + lg = logser_uptrunc.translate_args(4 * 4 / 4, 4 * 4)[0] + assert_almost_equal(-np.log(lg), 0.0459, decimal=4) + + lg = logser_uptrunc.translate_args(2 ** 4 * 4 / 4, 2 ** 4 * 4)[0] + assert_almost_equal(-np.log(lg), -0.00884, decimal=5) + + lg = logser_uptrunc.translate_args(2 ** 8 * 4 / 4, 2 ** 8 * 4)[0] + assert_almost_equal(-np.log(lg), -0.00161, decimal=5) + + lg = logser_uptrunc.translate_args(2 ** 8 * 16 / 16, 2 ** 8 * 16)[0] + assert_almost_equal(-np.log(lg), 0.000413, decimal=6) + + lg = logser_uptrunc.translate_args(2 ** 12 * 64 / 64, 2 ** 12 * 64)[0] + assert_almost_equal(-np.log(lg), 0.0000228, decimal=7) + + lg = logser_uptrunc.translate_args(20 / 20, 20)[0] + assert_equal(0, 0) + + + def test_n_close_to_s(self): + # Test the solver doesn't fail when N is very close to S + + _trunc_logser_solver(2, 3) + _trunc_logser_solver(3, 4) + _trunc_logser_solver(100, 101) + class TestExpon(TestCase): pass From 1fab659fd0bfae9cc9bff0904ac9dc73d12f2a7a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 12:03:54 -0700 Subject: [PATCH 210/343] Comment whitespace fix --- macroeco/models/test_distributions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index c4b39d8..af98128 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -206,9 +206,9 @@ def test_zillio_plots(self): ax.set_ylabel('P(x)') ax.text(0.6, 0.3, descrip[i], transform=ax.transAxes) - plt.tight_layout() + # plt.tight_layout() # Uncomment to see save figure - #fig.savefig("test_cbinom") + # fig.savefig("test_cbinom") class TestLogserUptrunc(TestCase): From a14b51dcc19c0f4973477591433262f5a406a142 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:32:04 -0700 Subject: [PATCH 211/343] Fix fit_lsq --- macroeco/models/_curves.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index 532f5a6..a8178c1 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -87,12 +87,12 @@ def fit_lsq(self, x, y_obs, params_start=None): x = np.array(x) y_obs = np.array(y_obs) if not params_start: - params_start = np.ones(len(self.parameters)) + params_start = np.ones(self.n_parameters) # Error checking - if len(x) != len(y): + if len(x) != len(y_obs): raise ValueError, "x and y_obs must be the same length" - if len(params) != self.n_parameters: + if len(params_start) != self.n_parameters: raise ValueError, "Incorrect number of values in params_start" # Calculate fit @@ -108,7 +108,7 @@ def residuals(params, x, y_obs): raise ValueError, ("Least squares fit did not converge with " "message %s" % msg) - return params_fit + return tuple(params_fit) class power_law_gen(curve): From cc46d08715f085b9cafb9db11b16d3053d0f051c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:32:53 -0700 Subject: [PATCH 212/343] Remove broadcast from sum of sq, set r_squared log trans False by default --- macroeco/compare/compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index 36932ee..a182b0c 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -196,12 +196,12 @@ def sum_of_squares(obs, pred): float Sum of squares """ - obs, pred = tuple(np.broadcast_arrays(obs, pred)) + #obs, pred = tuple(np.broadcast_arrays(obs, pred)) return np.sum((np.array(obs) - np.array(pred)) ** 2) @doc_sub(_obs_pred_doc) -def r_squared(obs, pred, one_to_one=False, log_trans=True): +def r_squared(obs, pred, one_to_one=False, log_trans=False): """ Get's the R^2 value for a regression of observed (X) and predicted (Y) data From d58c1cd1848a35bedf1e7fc9ae2a8cdc3f7520e0 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:33:29 -0700 Subject: [PATCH 213/343] Hide warning when using tight_layout --- macroeco/main/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index ee0b16e..c91c53a 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -2,6 +2,7 @@ import sys import os import shutil +import warnings import inspect import configparser import threading as thread @@ -12,7 +13,6 @@ import pandas as pd import matplotlib as mpl -mpl.use('Agg') import matplotlib.pyplot as plt from .. __init__ import __version__ @@ -524,7 +524,9 @@ def _write_distribution_plot_table(spid, models, options, core_results, ax1 = _pad_plot_frame(ax1) ax2 = _pad_plot_frame(ax2) - fig.tight_layout() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + fig.tight_layout() fig.savefig(p_path) plt.close('all') From 5f04f03462b508d8a1908be8d11056b0936a5075 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:34:26 -0700 Subject: [PATCH 214/343] Make main handle empirical curves, closes #54 --- macroeco/main/main.py | 115 +++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 45 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index c91c53a..4e3feaf 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -310,39 +310,57 @@ def _fit_models(options, core_results): for core_result in core_results: # Each subset fit_result = {} for model in models: - data = core_result[1]['y'].values - fits = _get_fits(data, model) - # TODO: values is probably better moved to output part - values = _get_values(data, model, fits) - - stat_names, stats = _get_comparison_statistic(data, model, fits) + fits = _get_fits(core_result, model) + values = _get_values(core_result, model, fits) + stat_names, stats = _get_comparison_stat(core_result, values, + model, fits) fit_result[model] = [fits, values, stat_names, stats] fit_results.append(fit_result) return fit_results -def _get_fits(data, model): - return eval("mod.%s.fit_mle(data)" % model) +def _get_fits(core_result, model): + + y = core_result[1]['y'].values + try: + result = eval("mod.%s.fit_mle(y)" % model) + except: + x = core_result[1]['x'].values + result = eval("mod.%s.fit_lsq(x, y)" % model) + return result -def _get_values(data, model, fits): +def _get_values(core_result, model, fits): try: - values = eval("mod.%s.pdf(data, *fits)" % model) - except AttributeError: - values = eval("mod.%s.pmf(data, *fits)" % model) + x = core_result[1]['x'].values # Calc model at x values + values = eval("mod.%s.vals(x, *fits)" % model) except: - pass + x = core_result[1]['y'].values # Calc model at data values + try: + values = eval("mod.%s.pdf(x, *fits)" % model) + except AttributeError: + values = eval("mod.%s.pmf(x, *fits)" % model) return values -def _get_comparison_statistic(data, model, fits): - # Just calculating AIC in this function +def _get_comparison_stat(core_result, values, model, fits): + # Uses AIC for distributions, R2 one-to-one for curves - aic = comp.AIC(data, eval("mod.%s" % model + "(*fits)")) - return ['AIC'], aic + try: # Only curves have vals + eval("mod.%s" % model + ".vals.__doc__") + obs = core_result[1]['y'].values + pred = values['y'].values + name = ['R2'] + stat = comp.r_squared(obs, pred, one_to_one=True) + except AttributeError: + obs = core_result[1]['y'].values + name = ['AIC'] + stat = comp.AIC(obs, eval("mod.%s" % model + "(*fits)")) + + return name, stat def _save_results(options, module, core_results, fit_results): @@ -377,15 +395,11 @@ def _save_results(options, module, core_results, fit_results): # Write model/data comparison if models were given if fit_results: models = options['models'].replace(' ','').split(';') - if 'x' in core_results[0][1]: # If df has an x col, curve - pass - else: # distribution - for i, core_result in enumerate(core_results): - _write_fitted_params(i, models, options, fit_results) - _write_test_statistics(i, models, options, fit_results) - _write_distribution_plot_table(i, models, options, - core_results, fit_results) - + for i, core_result in enumerate(core_results): + _write_fitted_params(i, models, options, fit_results) + _write_test_statistics(i, models, options, fit_results) + _write_comparison_plot_table(i, models, options, + core_results, fit_results) def _write_core_tables(options, module, core_results): """ @@ -465,8 +479,8 @@ def _write_test_statistics(spid, models, options, fit_results): f.close() -def _write_distribution_plot_table(spid, models, options, core_results, - fit_results): +def _write_comparison_plot_table(spid, models, options, core_results, + fit_results): """ Notes ----- @@ -475,26 +489,34 @@ def _write_distribution_plot_table(spid, models, options, core_results, """ + is_curve = 'x' in core_results[0][1] core_result = core_results[spid][1] n_vals = len(core_result) - # Set x (rank) and y in df - x = np.arange(n_vals) + 1 - df = core_result.sort(columns='y', ascending=False) + # Set x (given or rank) and y in df + if is_curve: + df = core_result.sort(columns='x') + else: + x = np.arange(n_vals) + 1 + df = core_result.sort(columns='y', ascending=False) + df.insert(0, 'x', x) df.rename(columns={'y': 'empirical'}, inplace=True) - df.insert(0, 'x', x) # Add residual column for each model for model in models: fit_result = fit_results[spid][model] shapes = fit_result[0] - result = eval("mod.%s.rank(len(df['x']), *shapes)" % model)[::-1] + if is_curve: + result = eval("mod.%s.vals(df['x'].values, *shapes)" + % model)['y'].values + else: + result = eval("mod.%s.rank(len(df['x']), *shapes)" % model)[::-1] df[model] = result df[model + "_residual"] = result - df['empirical'] # Set up file paths - f_path = _get_file_path(spid, options, 'rank_data_model.csv') - p_path = _get_file_path(spid, options, 'rank_data_model.pdf') + f_path = _get_file_path(spid, options, 'data_models.csv') + p_path = _get_file_path(spid, options, 'data_models.pdf') # Save table df.to_csv(f_path, index=False, float_format='%.4f') # Table @@ -504,24 +526,27 @@ def _write_distribution_plot_table(spid, models, options, core_results, ax1.plot(df['x'], df[models]) ax1.scatter(df['x'], df['empirical'], color='k') - ax1.legend(models + ['empirical']) - ax1.set_xlabel('rank') + ax1.legend(models + ['empirical'], loc='best') + ax1.set_xlabel('x') ax1.set_ylabel('value') - ax1.set_xlim(left=0) ax2.plot(df['x'], df[[x + '_residual' for x in models]]) - ax2.hlines(0, 1, np.max(df['x'])) - ax2.legend(models + ['empirical']) - ax2.set_ylim((-1 * np.max(df['empirical']), np.max(df['empirical']))) - ax2.set_xlabel('rank') + ax2.hlines(0, np.min(df['x']), np.max(df['x'])) + ax2.legend(models + ['empirical'], loc='best') + #ax2.set_ylim((-1 * np.max(df['empirical']), np.max(df['empirical']))) + ax2.set_xlabel('x') ax2.set_ylabel('residual') - if options.get('log_rank', None): + if options.get('log_y', None): ax1.set_yscale('log') - else: + if options.get('log_x', None): + ax1.set_xscale('log') + + if not options.get('log_x', None) and not options.get('log_y', None): ax1.set_ylim(bottom=0) + ax1.set_xlim(left=0) + ax1 = _pad_plot_frame(ax1) - ax1 = _pad_plot_frame(ax1) ax2 = _pad_plot_frame(ax2) with warnings.catch_warnings(): From db89e565bc4969cfe7edb4fbf0eda17d75bce953 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:38:49 -0700 Subject: [PATCH 215/343] Allow for creation of empty patch --- macroeco/empirical/empirical.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 932ee66..015c11d 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -92,7 +92,8 @@ class Patch(object): table : dataframe Table of census data recorded in patch meta : ConfigParser obj - Object similar to dict describing data table, loaded from metadata file at metadata_path and processed by subset + Object similar to dict describing data table, loaded from metadata + file at metadata_path and processed by subset subset : str Subset string passed as parameter @@ -124,11 +125,16 @@ class Patch(object): def __init__(self, metadata_path, subset=''): - self.meta = ConfigParser() - self.meta.read(metadata_path) - self.subset = subset - self.table = self._load_table(metadata_path, - self.meta['Description']['datapath']) + if not metadata_path: # Allow for creation of empty patch + self.meta = None + self.subset = '' + self.table = None + else: + self.meta = ConfigParser() + self.meta.read(metadata_path) + self.subset = subset + self.table = self._load_table(metadata_path, + self.meta['Description']['datapath']) def _load_table(self, metadata_path, data_path): From cc391e9e4d1864f99d1f1a620e57a24a0bf0b8f7 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:40:42 -0700 Subject: [PATCH 216/343] Handle case of div tuple to address input from main --- macroeco/empirical/empirical.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 015c11d..7b574e1 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -455,7 +455,7 @@ def sar(patch, cols, splits, divs): # Loop through all divisions within this split subresultx = [] subresulty = [] - subdivlist = divs.split(';') + subdivlist = _split_divs(divs) for divs in subdivlist: spatial_table = _yield_spatial_table(subpatch, divs, spp_col, x_col, y_col) @@ -514,7 +514,7 @@ def ear(patch, cols, splits, divisions): # Loop through all divisions within this split subresultx = [] subresulty = [] - subdivlist = divs.split(';') + subdivlist = _split_divs(divs) for divs in subdivlist: spatial_table = _yield_spatial_table(subpatch, divs, spp_col, x_col, y_col) @@ -538,6 +538,15 @@ def ear(patch, cols, splits, divisions): # Return all results return result_list + +def _split_divs(divs): + if type(divs) == type((1,1)): # Tuple (occurs when main evals single div) + subdivlist = [str(divs)[1:-1]] + else: # String + subdivlist = divs.split(';') + return subdivlist + + @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) def comm_grid(patch, cols, splits, divs, metric='Sorensen'): From 57ef7f52d20e8f12dff3e265df98936af2147aa0 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:41:43 -0700 Subject: [PATCH 217/343] Fix bug in repeated updating of meta dict --- macroeco/empirical/empirical.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 7b574e1..a4fe9ff 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -282,6 +282,10 @@ def _subset_meta(full_meta, subset): if not subset: return full_meta + meta = {} # Make deepcopy of entire meta (all section dicts in meta dict) + for key, val in full_meta.iteritems(): + meta[key] = copy.deepcopy(dict(val)) + conditions = subset.replace(' ','').split(';') # TODO: This works for numeric, modify to do nothing for categorical cols @@ -289,24 +293,24 @@ def _subset_meta(full_meta, subset): condition_list = re.split('[<>=]', condition) col = condition_list[0] val = condition_list[-1] - col_step = full_meta[col]['step'] + col_step = meta[col]['step'] operator = re.sub('[^<>=]', '', condition) if operator == '==': - full_meta[col]['min'] = val - full_meta[col]['max'] = val + meta[col]['min'] = val + meta[col]['max'] = val elif operator == '>=': - full_meta[col]['min'] = val + meta[col]['min'] = val elif operator == '>': - full_meta[col]['min'] = str(eval(val) + eval(col_step)) + meta[col]['min'] = str(eval(val) + eval(col_step)) elif operator == '<=': - full_meta[col]['max'] = val + meta[col]['max'] = val elif operator == '<': - full_meta[col]['max'] = str(eval(val) - eval(col_step)) + meta[col]['max'] = str(eval(val) - eval(col_step)) else: raise ValueError, "Subset %s not valid" % condition - return full_meta + return meta @log_start_end From 0d60006513c8ecc62993a08a719b2ec42a946220 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 8 Apr 2014 14:43:33 -0700 Subject: [PATCH 218/343] Fix typo in ear arguments divisions -> divs Divs used since division is reserved name and don't want to accidentally use division as a variable name. --- macroeco/empirical/empirical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index a4fe9ff..317e8a3 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -477,7 +477,7 @@ def sar(patch, cols, splits, divs): @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note, division_note) -def ear(patch, cols, splits, divisions): +def ear(patch, cols, splits, divs): """ Calculates an empirical endemics area relationship From 999f4108acb7fae6a45ea24decd7ab0cbf2b2240 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 11 Apr 2014 14:24:34 -0700 Subject: [PATCH 219/343] Added data_read_write for unwritten datatypes --- macroeco/main/main.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 36c260b..05b36c0 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -116,15 +116,22 @@ def _do_format(options): misc.data_read_write(datapath, out_path, "dense", **options) - elif analysis_name == 'format_columnar': - misc.format_columnar() + elif analysis_name == 'format_stacked': + + misc.data_read_write(datapath, out_path, "stacked", **options) + elif analysis_name == 'format_grid': - misc.format_grid() + + misc.data_read_write(datapath, out_path, "grid", **options) + elif analysis_name == 'format_transect': - misc.format_transect() + + misc.data_read_write(datapath, out_path, "transect", **options) + else: raise NameError("Cannot format data using analysis %s" % analysis_name) + def _do_analysis(options): """ Do analysis for a single run, as specified by options. From 888cce689910c0c4e20b2240f5e1fd381d394c87 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 11 Apr 2014 14:26:02 -0700 Subject: [PATCH 220/343] Removed old format files --- macroeco/misc/form_func.py | 659 ------------------- macroeco/misc/old_format_data.py | 1014 ----------------------------- macroeco/misc/xest_form_func.py | 108 --- macroeco/misc/xest_format_data.py | 475 -------------- 4 files changed, 2256 deletions(-) delete mode 100644 macroeco/misc/form_func.py delete mode 100644 macroeco/misc/old_format_data.py delete mode 100644 macroeco/misc/xest_form_func.py delete mode 100644 macroeco/misc/xest_format_data.py diff --git a/macroeco/misc/form_func.py b/macroeco/misc/form_func.py deleted file mode 100644 index 16b91e9..0000000 --- a/macroeco/misc/form_func.py +++ /dev/null @@ -1,659 +0,0 @@ -#!/usr/bin/python -'''This module contains the functions for formatting data files''' - -import os -import numpy as np -import csv -import matplotlib.mlab as plt -import glob -import sys - -#Hacking this..Oh well -import format_data -loc = format_data.__file__ -gcwd = os.getcwd #get current directory -pd = os.path.dirname #get parent directory -chdir = os.chdir #change directories -jp = os.path.join #Join paths -sys.path.append(pd(pd(loc))) -from data import Metadata -import itertools -import logging - - -#Formatting functions -def get_metadata(asklist, folder_name, dataname): - ''' - This function takes in a list of tuples and returns the appropriate - metadata in a dictionary - - Parameters - ---------- - asklist : list - A list of tuples e.g. [('x', 'precision'), ('y', 'maximum')] - - folder_name : string - Name of the archival folder where data is located e.g. BCIS - - dataname : string - Name of the metadata e.g. BCIS_1984.xml (string) - - Returns - ------- - : dict - A dictionary containing requested metadata values - - ''' - cwd = gcwd() - chdir(jp(pd(pd(gcwd())), 'archival', folder_name)) - meta = Metadata(dataname, asklist) - chdir(cwd) - return meta.get_meta_dict(asklist) - -def get_files(filetype, num, direct, globber='_????'): - ''' - This function gets the filetype files from the data directory - /archival/direct and returns the names of the filetype files in the - directory. - - Parameters - ---------- - filetype : string - A string specifying the type of the file, i.e. 'csv' or 'txt' - - num : int - Expected number of files of type 'direct_????.filetype' - - direct : string - The directory within /data/archival/ where the files are. - Example 'BCIS' or 'COCO' - - globber : string - String of what pattern is to be globbed - - Returns - ------- - : list - A list of strings - - ''' - - assert direct.find('/') == -1, "%s should not contain a '/'" % (direct) - cwd = gcwd(); - filedir = jp(pd(pd(gcwd())), 'archival', direct) - chdir(filedir) - datafiles = glob.glob(direct + globber + '.' + filetype) - chdir(cwd) - if not(len(datafiles) == num): - raise Exception("Must be exactly {0} {1}_*.{2} file in /archival/{1}"\ - .format(num, direct, filetype)) - return datafiles - - -def open_data(filename, delim, names=None): - ''' - This functions takes in the filename and returns a rec array. - - Parameters - ---------- - filename : string - Name of the data file - - delim : string - File delimiter - - names : list - A list of columns names. See csv2rec? - - Returns - ------- - : recarray - A recarray containing the data from the specified file name - - ''' - - data = plt.csv2rec(filename, delimiter=delim, names=names) - return data - -def create_intcodes(speclist, unq_specs, unq_ints, dtype=float): - '''This function converts each value in unq_specs to the corresponding - value in unq_ints. Acts on speclist. - - Parameters - ---------- - - speclist : np.array - a 1D np.array which contains the occurrences of the species within the - plot - - unq_specs : np.array - a 1D np.array of the unique species codes within the plot - - unq_int : np.array - 1D np.array of unique integers referring to the unique species codes - found within the plot - - dtype : type - The type of the tot_int array. Default is float - - - Returns - ------- - : np.array - A 1D np.array of integers that is equivalent to speclist - - ''' - assert len(speclist) > 0, "Species array cannot be empty" - assert len(unq_specs) == len(unq_ints), "unq_specs and unq_ints must be " \ - + "the same length" - speclist = speclist.astype(unq_specs.dtype) - tot_int = np.empty(len(speclist), dtype=dtype) - for s in xrange(len(unq_specs)): - check = (unq_specs[s] == speclist) - for i in xrange(len(check)): - if check[i]: - tot_int[i] = unq_ints[s] - return tot_int - -def output_form(data, filename): - '''This function writes data as a .csv into the current working directory - - Parameters - ---------- - data : structured array - An structured array containing the data to be output - - filename : string - A string representing the name of the file to be output. - - ''' - savedir = jp(gcwd(), filename.split('.')[0] + '.csv') - fout = csv.writer(open(savedir, 'w'), delimiter=',') - fout.writerow(data.dtype.names) - for i in xrange(len(data)): - fout.writerow(data[i]) - -def open_dense_data(filenames, direct, delim=','): - ''' - This function takes in a list of dense data file names, opens - them and returns them as list of rec arrays. - - Parameters - ---------- - - filenames : list - A list of filenames - - direct : string - The directory within data/archival/ where the files are. - Example 'ANBO_2010' or 'LBRI' - - delim : string - The default file delimiter is ',' - - Returns - ------- - : list - A list of rec arrays - - ''' - assert direct.find('/') == -1, "%s should not contain a '/'" % (direct) - filedir = jp(pd(pd(gcwd())), 'archival', direct) - datayears = [] - for name in filenames: - data = plt.csv2rec(jp(filedir, name), delimiter=delim) - datayears.append(data) - return datayears - -def format_dense(datayears, spp_col, num_spp, count_col='count'): - ''' - This function takes a list of data. This functions interates - through the list and formats each year of data and stores the - formatted data into a list containing all years of formatted data. - - Parameters - ---------- - datayears : list - A list of rec arrays containing all years of data - - - spp_col : int - The column in the dense array where the spp_names begin. 0 is the first - column. - - num_spp : tuple or int - Total number of species in plot. Each element in the tuple is the - number of species in the corresponding rec array in data year. - Therefore, len(num_spp) should equal len(datayears). If num_spp is an - int, it is converted to a tuple and extended to len(datayears) - - count_col : str - This string specifies the name of the count column. The default is - 'count'. - - Returns - ------- - : list - A list of formatted structured arrays. - - ''' - # Handle and broadcast num_spp - if type(num_spp) == int: - num_spp = (num_spp,) - else: - num_spp = tuple(num_spp) - - if (len(num_spp) != len(datayears)): - if len(num_spp) == 1: - num_spp = tuple(np.repeat(num_spp[0], len(datayears))) - else: - raise TypeError('len(num_spp) must equal len(datayears)') - - - - data_formatted = [] - for k, data in enumerate(datayears): - ls = len(data.dtype.names[spp_col:spp_col + num_spp[k]]) - if len(data.dtype.names[:spp_col + num_spp[k]]) == \ - len(data.dtype.names): - dtype = data.dtype.descr[:spp_col] + [('spp', 'S22'), (count_col,\ - np.float)] - else: - dtype = data.dtype.descr[:spp_col] + data.dtype.descr[spp_col + \ - num_spp[k]:] + [('spp', 'S22'), (count_col, np.float)] - - data_out = np.empty(ls * len(data), dtype=dtype) - - for s, name in enumerate(data_out.dtype.names[:-2]): - cnt = 0 - for i in xrange(len(data)): - if s == 0: - data_out[name][cnt:(ls*(i+1))] = data[name][i] - data_out['spp'][cnt:(ls*(i+1))] = np.array\ - (data.dtype.names[spp_col:\ - spp_col + num_spp[k]]) - data_out[count_col][cnt:(ls*(i+1))] =\ - np.array(list(data[i]))[spp_col:spp_col +\ - num_spp[k]] - cnt = cnt + ls - else: - data_out[name][cnt:(ls*(i+1))] = data[name][i] - cnt = cnt + ls - #Remove all zeros, they are not needed - data_out = data_out[data_out[count_col] != 0] - data_formatted.append(data_out) - return data_formatted - -def open_nan_data(filenames, missing_value, site, delim, col_labels): - ''' - This function takes in the filenames with nans data file, removes any - NaN values for the x and y coordinates and returns a rec array. - - Parameters - ---------- - - filename : list - A list of filenames which point to data with missing values - - missing_value : string - How a missing value is labeled in the data - - site : string - Site name. Ex. 'COCO' or 'BCIS' - - delim : string - Delimiter for the files - - xylabels : tuple - Tuple with x and y column labels, i.e. ('gx', 'gy') or ('x', 'y') - - Returns - ------- - : list - list of recarrays - - ''' - #NOTE: Might need to get rid of some more NA fields - datadir = jp(pd(pd(gcwd())), 'archival', site) - datayears = [] - for name in filenames: - data = plt.csv2rec(jp(datadir, name), delimiter=delim,\ - missing=missing_value) - for label in col_labels: - notNaN = (False == np.isnan(data[label])) - data = data[notNaN] - datayears.append(data) - - return datayears - -def fractionate(datayears, wid_len_new, step_new, col_names, - wid_len_old=None, min_old=None, step_old=None): - ''' - This function takes in a list of formatted data years and converts the grid - numbers into meter measurements. For example, LBRI is a 16x16 grid and each - cell is labeled with integers. However, the length (and width) of a cell - is 0.5m. This function converts each integer cell number to the appropriate - integer (i.e. for LBRI cell (2,2) (counting from 1) becomes cell (0.5, - 0.5)). - - Parameters - ---------- - datayears : list - A list of formatted structured arrays - - wid_len_new : tuple - A tuple containing the new width (x) in meters and length (y) - in meters of the entire plot. - - step_new : tuple - The new step (or stride length) of the cell width and length - (tuple: (x_step, y_step)). It should be given in terms of meters. Also, - called precision. - - col_names : list - The col_names of the structured array that are to be fractionated. - - wid_len_old : tuple or None - If None, it assumes that a np.unique on datayears[col_name[i]] gives a - array that is the same length as np.arange(0, wid_len_new[i], - step=step_new[i]). If it doesn't, an error will be thrown. If not - None, expects the old maximum length for the given columns. - - min_old : tuple or None - Same as wid_len_old but the old minimum value for each given column - - step_old : tuple or None - Same as wid_len_old but the old step (or stride length/spacing) for - each given column. - - Returns - ------- - : list - A list of converted structured arrays - - Notes - ----- - This function should be used on columnar data - - ''' - - # format column names - col_names = format_headers(col_names) - - frct_array = [] - for data in datayears: - for i, name in enumerate(col_names): - if wid_len_old != None and step_old != None and min_old != None: - nums = np.arange(min_old[i], wid_len_old[i] + step_old[i], - step=step_old[i]) - else: - nums = np.unique(data[name]) - frac = np.arange(0, wid_len_new[i], step=step_new[i]) - #Have to make sure I have the data right type - ind = list(data.dtype.names).index(name) - dt = data.dtype.descr - dt[ind] = (name, 'f8') - data = data.astype(dt) - data[name] = create_intcodes(data[name], nums, frac) - frct_array.append(data) - - return frct_array - -def add_data_fields(data_list, fields_values, descr='S20'): - ''' - Add fields to data based on given names and values - - Parameters - ---------- - data_list : list - List of data to which a field will be appended - - fields_values : dict - dictionary with keyword being the the field name to be added and the - value being a tuple with length data_list specifying the - values to be added to each field in each data set. - - descr : a single data type or a dictionary - A single value will be broadcast to appropriate length. The dictionary - must have the same keywords as fields_values and must be the same - length. Each keyword should lookup a dtype. - - Returns - ------- - : list - A list containing the structured arrays with the new fields appended - - Notes - ----- - All added fields have default dtypes of 'S20' - - ''' - - # Check that dype descriptors are formatted appropriately - if type(fields_values) != dict: - raise TypeError('fields_values must be a dict not %s of type %s' % - (str(fields_values), str(type(fields_values)))) - keys = fields_values.viewkeys() - if type(descr) == dict: - if set(list(descr.viewkeys())) != set(list(keys)): - raise ValueError("descr and fields_values must contain same keys") - elif type(descr) == type or type(descr) == str: - descr = broadcast(len(fields_values), descr) - descr = dict(itertools.izip(keys, descr)) - else: - raise ValueError("Invalid type for descr") - - alt_data = [] - - dlen = len(data_list) - for i, data in enumerate(data_list): - for name in list(fields_values.viewkeys()): - data = add_field(data, [(name, descr[name])]) - - try: - ind = len(fields_values[name]) != dlen - if ind: #broadcast - fields_values[name] = broadcast(dlen, fields_values[name]) - except TypeError: - # Broadcast fields_values. Error is thrown if can't broadcast - fields_values[name] = broadcast(dlen, fields_values[name]) - - data[name] = fields_values[name][i] - alt_data.append(data) - return alt_data - -def merge_formatted(data_form): - ''' - Take in a list of formatted data an merge all data in - the list. The dtypes of the data in the list must - be the same - - Parameters - ---------- - data_form : list - List of formatted structured arrays (or rec_arrays) - - Returns - ------- - : list - A list containing one merged structured array - - ''' - if len(data_form) == 1: - return np.array(data_form[0]) - else: - # Dtypes can be a bit of a pain here - merged = np.copy(np.array(data_form[0])) - for i in xrange(1, len(data_form)): - if merged.dtype != data_form[i].dtype: - if merged.dtype.names != data_form[i].dtype.names: - raise TypeError("Column names of data do not match") - else: # If data dtypes are just different strings they should - # still be able to merge - temp_arr = list(np.copy(merged)) + list(np.copy(data_form[i])) - merge_types = [ty[1] for ty in merged.dtype.descr] - dt_types = [ty[1] for ty in data_form[i].dtype.descr] - con_types = [] - for m,d in zip(merge_types, dt_types): - if m == d: - con_types.append(m) - elif type(m) == str and type(d) == str: - if m[:2] == d[:2]: - if m > d: - con_types.append(m) - else: - con_types.append(d) - # Have to adjust the types appropriately - if len(con_types) == len(merged.dtype.names): - dtype = zip(merged.dtype.names, con_types) - merged = np.empty(len(temp_arr), dtype=dtype) - flipped_temp = zip(*temp_arr) - for i, nm in enumerate(merged.dtype.names): - merged[nm] =\ - np.array(flipped_temp[i]).astype(dtype[i][1]) - else: - raise TypeError('dtypes of data do not match. Merge' \ - + ' failed') - else: - merged = np.concatenate((merged, np.array(data_form[i]))) - return merged - -def add_field(a, descr): - ''' - Add field to structured array and return new array with empty field - - Parameters - ---------- - a : structured array - Orginial structured array - descr : list - dtype of new field i.e. [('name', 'type')] - - Returns - ------- - : structured array - Structured array with field added - - ''' - - if a.dtype.fields is None: - raise ValueError, "'A' must be a structured numpy array" - b = np.empty(a.shape, dtype=descr + a.dtype.descr) - for name in a.dtype.names: - b[name] = a[name] - return b - -def broadcast(length, item): - ''' - Broadcasts item to length = length if possible. Else raises error. - - length -- int - - item -- int of iterable - - ''' - # Handle and broadcast item - if type(item) == int: - item = (item,) - elif type(item) == type: - item = (item,) - elif type(item) == str: - item = (item,) - else: - item = tuple(item) - - if (len(item) != length): - if len(item) == 1: - item = tuple(np.repeat(item[0], length)) - else: - raise ValueError('Could not broadcast %s to length $s' % - (str(item), str(length))) - return item - -def format_headers(headers): - ''' Uses same formatting code that csv2rec uses. Converts the passed in - headers to the same format the csv2rec uses. - - Parameters - ---------- - headers : list - list of strings to be converted - - Return - ------ - : list - converted strings - - Notes - ----- - See csv2rec documentation and code - ''' - - # convert header to list of strings - if type(headers) == str or type(headers) == int or type(headers) == float: - headers = [headers] - headers = [str(i) for i in headers] - - - itemd = { - 'return' : 'return_', - 'file' : 'file_', - 'print' : 'print_', - } - - # remove these chars - delete = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") - delete.add('"') - - names = [] - seen = dict() - for i, item in enumerate(headers): - item = item.strip().lower().replace(' ', '_') - item = ''.join([c for c in item if c not in delete]) - if not len(item): - item = 'column%d'%i - - item = itemd.get(item, item) - cnt = seen.get(item, 0) - if cnt>0: - names.append(item + '_%d'%cnt) - else: - names.append(item) - seen[item] = cnt+1 - - - return names - -def format_dict_names(old_dict): - ''' - This function formats the names with the format_headers function and - returns a new dictionary with the formatted names. Both dictionaries - contain the same values - - Parameters - ---------- - old_dict : dict - Dictioary with old keywords that will be changed - - Returns - ------- - new_dict : dict - Dictionary with updated keywords - - ''' - new_dict = {} - oldkeys = sorted(old_dict) - newkeys = format_headers(oldkeys) - for i in xrange(len(oldkeys)): - new_dict[newkeys[i]] = old_dict[oldkeys[i]] - - return new_dict - - - - - - - - - diff --git a/macroeco/misc/old_format_data.py b/macroeco/misc/old_format_data.py deleted file mode 100644 index de9b9e6..0000000 --- a/macroeco/misc/old_format_data.py +++ /dev/null @@ -1,1014 +0,0 @@ -#!/usr/bin/python - -'''This module contains 4 separate classes, each built to handle a -canonical data type - -This module provides the user with some formatting functions but does provide -the user with all formatting functions that may be required. This module is -not a substitute for thorough examination of ones data to remove irrelevant -data''' - -import numpy as np -from matplotlib.mlab import csv2rec -import form_func as ff -from numpy.lib.recfunctions import drop_fields -import csv - - -class Columnar_Data: - ''' - This is the data form that the macroeco software package wants the data - file in. All other canonical data sets are converted to columnar data and - then turned into Columnar_Data objects. - - Examples of columnar data include BCIS, LUQU, and COCO - - Multiple data files must have same format if they are to be merged - - ''' - - def __init__(self, datalist, delimiter=',', missingd=None,\ - delete_missing=False, archival=True): - ''' - This __init__ method takes in data and stores it in rec_arrays. - If specified, it will located missing data points and remove them - from the data set. - - Parameters - ---------- - datalist : string, list of strings, or list of ndarrays. - Data filenames or list of data arrays - - delimiter : string - The file delimiter. Default is ',' - - missingd : dict - Dictionary mapping munged column names to field values which - signify that the field does not contain actual data and should be - masked, e.g. '0000-00-00' or 'unused'. The missing value must be - represented as a string. - - delete_missing : bool - If True, deletes all of the missing values. If False, only deletes - the NaNs from the data. - - archival : bool - If True, a copy of self.columnar_data is made and stored in - self.columnar_archival. If dataset is very large, set to False. - - Note - ---- - If column type is integer, missing values are set to -1. If column - type is float, missing values are set to NaN. If column type is - string, missing values are set to ''. If column type is object, - missing values are set to None. - - ''' - if type(datalist) == str: - datalist = [datalist] - - if np.all(np.array([type(x) == str for x in datalist])): - self.columnar_data = [] - self.data_names = [] - for file_name in datalist: - self.columnar_data.append(csv2rec(file_name, delimiter=delimiter,\ - missingd=missingd)) - self.data_names.append(file_name) - if missingd != None: - if delete_missing: - trun_data = [] - for data in self.columnar_data: - for key in missingd.iterkeys(): - try: - # Missing float - notNaN = (False == np.isnan(data[key])) - except: - notNaN = np.ones(len(data[key]), dtype=bool) - notBlank = np.array([it != '' for it in data[key]]) - notMinusOne = (data[key] != -1)# Missing int - # Missing other - notNone = np.array([i != None for i in data[key]]) - ind = np.bitwise_and(notNaN, notBlank) - ind = np.bitwise_and(ind, notMinusOne) - ind = np.bitwise_and(ind, notNone) - data = data[ind] - trun_data.append(data) - self.columnar_data = trun_data - else: - trun_data = [] - for data in self.columnar_data: - for key in missingd.iterkeys(): - try: - notNaN = (False == np.isnan(data[key])) - except: - notNaN = np.ones(len(data[key]), dtype=bool) - data = data[notNaN] - trun_data.append(data) - self.columnar_data = trun_data - elif np.all(np.array([type(x) == np.ndarray for x in datalist])): - self.columnar_data = datalist - - if archival: - self.columnar_archival = [np.copy(data) for data in - self.columnar_data] - else: - self.columnar_archival = [] - - def reset_columnar_data(self): - ''' - Resets self.columnar_data to self.columnar_archival - - Need to be careful about excessive memory usage! - ''' - if len(self.columnar_archival) == 0: - raise ValueError("The self.columnar_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.columnar_data = [np.copy(data) for data in - self.columnar_archival] - - def subset_data(self, subset={}): - ''' - Subset any given column of the data - - Parameters - ---------- - subset : dict - Dictionary of permanent subset to data, {'column_name': - 'condition'}, which will limit all analysis to records in which - column_name meets the condition, ie, {'year': ('==', 2005), 'x': - [('>', 20), ('<', 40)]} restricts analysis to year 2005 and x - values between 20 and 40. These conditions can also be passed to - the individual methods, but subsetting the data table up front may - save analysis time. Subsetting on a string would look something - like {'name' : [('==', 'John'), ('==', 'Harry')]} - ''' - - - if subset != {}: - # Format column names - subset = ff.format_dict_names(subset) - - sub_data = [] - for data in self.columnar_data: - valid = np.ones(len(data), dtype=bool) - - for key, value in subset.iteritems(): - if type(value) is not type(['a']): # Make all iterables - value = [value] - - # Merge tuples into a string - merged_values = [] - for val in value: - try: # check if val[1] is a string - eval(str(val[1])) - merged_values.append(val[0] + str(val[1])) - except: - merged_values.append(val[0] + "'" + val[1] + "'") - - for this_value in merged_values: - try: - this_valid = eval("data[key]" + this_value) - valid = np.logical_and(valid, this_valid) - except ValueError: #If key can't be found do nothing - pass - - sub_data.append(data[valid]) - - self.columnar_data = sub_data - - def split_up_data_by_field(self, split_columns=None): - ''' - This function will take in the split-columns list and and split the - data into separate arrays based on the list. For example, if one were - to pass in dbh1, dbh2, dbh3 three copies of the data would be - made, each being identical except that each would only contain one of - the instances of dbh. One could also pass [(dbh1, recr1), (dbh2, recr2), - (dbh3, recr3)]. All other fields in split_columns will be excluded - other than the fields within the tuple under consideration. - - Parameters - ---------- - split_columns : list - a list of tuples specifying the columns by which to split the array - - Notes - ----- - Saves the split array as self.columnar_data. - - ''' - #Note: If they enter the wrong column name nothing will be removed - #Should I error check for this? - if split_columns != None: - # Check if split_columns is a list of strings. If so, change it - # into a list of tuples - split_columns = [(s,) if type(s) == str else tuple(s) for s in - split_columns] - - # Format the names in each tuple - split_columns = [tuple(ff.format_headers(nms)) for nms in - split_columns] - - split_data = [] - given_col_names = [] - for tup in split_columns: - for name in tup: - given_col_names.append(name) - given_col_names = np.array(given_col_names) - - - for data in self.columnar_data: - for tup in split_columns: - ind = np.ones(len(given_col_names), dtype=bool) - for name in tup: - ind = np.bitwise_and((name != given_col_names), ind) - remove_names = given_col_names[ind] - split_data.append(drop_fields(data, list(remove_names))) - self.columnar_data = split_data - - def change_column_names(self, change=None, changed_to=None): - ''' - This function takes a list of column names to be changed and a name - that they should be changed to - - Parameters - ---------- - change : list of tuples or strings - Each tuple or string contains column names. All the column names in - the first tuple will be changed to the first element in the - changed_to list and so on. - changed_to : list - A list of strings that contain the names that the columns in change - will be changed to. - - Notes - ----- - This function is useful if you would like to merge self.columnar_data - but the dtype.names are different. - - ''' - if change != None and changed_to != None: - if len(change) != len(changed_to): - raise ValueError('Length of params change and changed_to must' - + ' be equal') - # Convert to tuples if just received strings - change = [(x,) if type(x) == str else tuple(x) for x in change] - - # Format the names in each tuple - change = [tuple(ff.format_headers(nms)) for nms in change] - - for data in self.columnar_data: - column_names = np.array(data.dtype.names) - for i, name_tup in enumerate(change): - for name in name_tup: - find = np.where((name == column_names))[0] - if len(find) != 0: - max_len = np.max([len(x) for x in column_names]) - if max_len < len(changed_to[i]): - column_names = column_names.astype('S' + - str(len(changed_to[i]))) - column_names[find[0]] = changed_to[i] - data.dtype.names = tuple(column_names) - - def add_fields_to_data_list(self, fields_values=None, descr='S20'): - ''' - This functions adds given fields and values to the data list. If the - length of the value for a given keyword in one, it will be broadcast to - the length of self.columnar_data. Else an error will be thrown. - - Parameters - ---------- - fields_values : dict - dictionary with keyword being the the field name to be added and - the value being a tuple with length self.columnar_data specifying - the values to be added to each field in each data set. - descr : a single data type or a dictionary - A single value will be broadcast to appropriate length. The - dictionary must have the same keywords as fields_values and must be - the same length. Each keyword should lookup a dtype. - ''' - if fields_values != None: - self.columnar_data = ff.add_data_fields(self.columnar_data, - fields_values, descr=descr) - - def remove_columns(self, col_names=None): - ''' - This function will remove the all the columns within with names in - col_names from all the datasets in self.columnar_data. - - Parameters - ---------- - col_names : string or list - The name or names of columns to be removed - - ''' - - if col_names != None: - - if type(col_names) == str: - col_names = [col_names] - else: - col_names = list(col_names) - - # Format column names - col_names = ff.format_headers(col_names) - - removed_data = [] - for data in self.columnar_data: - removed_data.append(drop_fields(data, col_names)) - self.columnar_data = removed_data - - def fractionate_data(self, wid_len=None, step=None, col_names=None, - wid_len_old=None, min_old=None, step_old=None): - ''' - This function converts grid numbers to length measurements in - self.columnar_data - - Parameters - ---------- - wid_len : tuple - A tuple containing the the absolute length of the columns being - converted - step : tuple - The desierd precision (step or stride length) of each grid. The - first element in the step tuple corresponds with the first element - in the wid_len tuple and so on. - col_names : array-like object - An array-like object of strings giving the names of the columns - that will be fractionated - wid_len_old : tuple or None - If None, it assumes that a np.unique on datayears[col_name[i]] - gives a array that is the same length as np.arange(0, - wid_len_new[i], step=step_new[i]). If it doesn't, an error will be - thrown. If not None, expects the old maximum length for the given - columns. - min_old : tuple or None - Same as wid_len_old but the old minimum value for each given column - step_old : tuple or None - Same as wid_len_old but the old step (or stride length/spacing) for - each given column. - - ''' - if wid_len != None and step != None and col_names != None: - self.columnar_data = ff.fractionate(self.columnar_data, wid_len, step, - col_names, wid_len_old=wid_len_old, - min_old=min_old, step_old=step_old) - - - def merge_data(self): - ''' - This function concatenates the data files in data_list. The dtypes of - the data in data_list must be identical or this function will fail. - ''' - - self.merged_data = ff.merge_formatted(self.columnar_data) - - def output_merged_data(self, filename): - ''' - This function merges self.columnar_data and outputs the merged data. - - Parameters - ---------- - filename : string - The filename to be output - - ''' - #Merge data in case it has not been done - self.merge_data() - ff.output_form(self.merged_data, filename) - - def output_columnar_data(self, filenames): - ''' - This function outputs the self.columnar_data - - Parameters - ---------- - filenames : list - A list of filenames - - ''' - assert len(filenames) == len(self.columnar_data), "Number of " + \ - "filenames must be the same as the number of datasets" - for i, name in enumerate(filenames): - ff.output_form(self.columnar_data[i], name) - -class Grid_Data: - '''This class handles data should look like the EarthFlow data after a - census. It is a grid with species abundance data in each cell. - ex. - ARTDRA - 6 - GERTYR - 8 - - ''' - - def __init__(self, filenames, archival=True, spp_sep='\n'): - ''' - Pass in the file name(s) of the grid data that you want converted and - the number of columns in each grid. - - Parameters - ---------- - - filenames : str or list of strings - A filename or list of filenames - - archival : bool - If True, a copy of self.grid_data is made and stored in - self.grid_archival. If dataset is very large, set to False. - - ''' - #NOTE: Handle missing data!!!! - - if type(filenames) == str: - filenames = [filenames] - - assert np.all(np.array([name.split('.')[-1] for name in filenames]) ==\ - 'csv'), "Files must be csv" - - self.grid_data = [] - self.cols = [] - self.rows =[] - - for i, name in enumerate(filenames): - # Sometimes csv.reader reads an extra column so you have to read to - # whole file. Seems stupid to read in the file twice but oh well... - with open(name, 'rb') as csvreader: - reader = csv.reader(csvreader) - rows = [row for row in reader] - min_len = np.min([len(row) for row in rows]) - self.cols.append(min_len) - - self.grid_data.append(csv2rec(name, names=list(np.arange(0,\ - self.cols[i]).astype('S10')))) - self.rows.append(len(self.grid_data[i])) - - #Remove all '\n' from the end of each cell in grid - #Not technically necessary but just being clean - self.grid_data = remove_char(self.grid_data, char=spp_sep) - self.grid_data = remove_white_spaces(self.grid_data) - - if archival == True: - self.grid_archival = [np.copy(data) for data in self.grid_data] - else: - self.grid_archival = [] - - def reset_grid_data(self): - ''' - Resets self.grid_data to self.archival_data - - Need to be careful about excessive memory usage! - ''' - - if len(self.grid_archival) == 0: - raise ValueError("The self.grid_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.grid_data = [np.copy(data) for data in self.grid_archival] - - def truncate_grid_cells(self, symbol=None): - ''' - This function will look at each cell in grid list and truncated the - string within the cell at AND after the first instance of a given - symbol. - - Parameters - ---------- - symbol : string or list of strings - The symbol at which to being truncation - - Notes - ----- - symbol is a keyword argument because format_grid_data script gives the - option to run every method. - - ''' - if symbol != None: - - if type(symbol) == str: - symbol = [symbol] - else: - symbol = list(symbol) - - for i in xrange(len(self.grid_data)): - for nm in self.grid_data[i].dtype.names: - for j in xrange(len(self.grid_data[i][nm])): - for sym in symbol: - ind = self.grid_data[i][nm][j].find(sym) - if ind != -1: - self.grid_data[i][nm][j] = \ - self.grid_data[i][nm][j][:ind] - - self.grid_data = remove_char(self.grid_data) - - # List of remove replace tuples? - def remove_and_replace(self, remove=None, replace=''): - ''' - Removes a string from a grid cell and replaces it with another one - - Paramters - --------- - remove : string - String to be removed - replace : string - String to replace removed string - - ''' - - if remove != None and replace != None: - for i in xrange(len(self.grid_data)): - for nm in self.grid_data[i].dtype.names: - for j in xrange(len(self.grid_data[i][nm])): - self.grid_data[i][nm][j] =\ - self.grid_data[i][nm][j].replace(remove, replace) - - def find_unique_spp_in_grid(self, spacer='-', spp_sep='\n'): - ''' - This function finds all of the unique species in the grid. - It assumes that your grid data is in the proper format. - - Parameters - ---------- - spacer : str - The character separating the species code from the species count. - Default value is '-' (n-dash) - - spp_sep : str - The character that separates a speces/count combination from - another species/count combination. Default value is '\n' - - ''' - self.unq_spp_lists = [] - for num, data in enumerate(self.grid_data): - spp_names = [] - for col in data.dtype.names: - for row in xrange(self.rows[num]): - if data[col][row].find(spacer) != -1: - nam_lst = data[col][row].split(spacer) - if len(nam_lst) == 2: - spp_names.append(nam_lst[0].strip()) - else: - spp_names.append(nam_lst[0].strip()) - for i in xrange(1, len(nam_lst) - 1): - spp_names.append(nam_lst[i].split(spp_sep)[1].\ - strip()) - self.unq_spp_lists.append(np.unique(np.array(spp_names))) - - def grid_to_dense(self, spacer='-', spp_sep='\n', archival=True): - ''' - This function converts a the list of gridded data sets into dense - data sets and stores them in dense_data. In addition, it - makes a Dense_Data object out of the newly converted data. - - Parameters - ---------- - spacer : str - The character separating the species code from the species count. - Default value is '-' (n-slash) - - spp_sep : str - The character that separates a speces/count combination from - another species/count combination. Default value is '\n' - - - ''' - - self.find_unique_spp_in_grid(spacer=spacer, spp_sep=spp_sep) - dense_data = [] - for i, data in enumerate(self.grid_data): - dtype_list = [('cell', np.int), ('row', np.int), ('column', np.int)] - for name in self.unq_spp_lists[i]: - tuple_type = (name, np.float) - dtype_list.append(tuple_type) - matrix = np.empty(self.rows[i] * self.cols[i], dtype=dtype_list) - #Iterate through the plot - count = 0 - for col in data.dtype.names: - for row in xrange(self.rows[i]): - matrix['cell'][count] = count - matrix['row'][count] = row - matrix['column'][count] = int(col) - for spp_name in self.unq_spp_lists[i]: - - # Check if cell has species. May be nested occurence! - matrix[spp_name][count] = 0 # Set base to 0 - start = data[col][row].find(spp_name) - if start == -1: # Nothing is there - pass # Count already set to zero - - else: # Something is there, but is it nested? - found = start - while found != -1: - # If this is true, it is nested - if (data[col][row][start + len(spp_name)] !=\ - spacer) or not(start == 0 or \ - data[col][row][start - 1] == spp_sep): - - pass - - else: # Actually a species, so add some - # abundance - - raw = data[col][row][start:].split(spacer)[1] - if raw.find(spp_sep) != -1: - tot_spp = raw.split(spp_sep)[0].strip() - else: - tot_spp = raw.split()[0].strip() - matrix[spp_name][count] += float(tot_spp) - found = data[col][row][start + 1 - :].find(spp_name) - start += found + 1 - count += 1 - dense_data.append(matrix) - self.Dense_Object = Dense_Data(dense_data, archival=archival) - - - def output_grid_data(self, filenames): - ''' - This function prints the data within self.grid_data with the given - filenames. - - Parameters - ----------- - filenames : list - A list of filnames to which the data will be saved - - ''' - - assert len(filenames) == len(self.grid_data), "Number of filenames\ - must be the same as the number of datasets" - for i, data in enumerate(self.grid_data): - ff.output_form(data, filenames[i]) - - -class Dense_Data: - '''This class handles data that are in the dense format. An example of the - dense format is a csv file that has columns named 'row' and 'column' and - the remainder of columns named after each species in the plot. The values - within each species column are the counts within the cell specified by the - columns names 'row' and 'column'. - - Note: Need to consider how I might break this class - ''' - - def __init__(self, datalist, delim=',', replace=None, archival=True): - ''' - - Parameters - ----------- - datalist : string, list of strings or list of arrays - List of filenames to be loaded or list of arrays to be set to - self.dense_data - delim : string - The file delimiter - replace : tuple - A tuple of length 2. The first element is a string that - represents the missing values that you would like to replace. The - second element is the value with which you would like to replace - the missing values. - archival : bool - If True, a copy of self.dense_data is made and stored in - self.dense_archival. If dataset is very large, set to False. - - ''' - #TODO: What kind of files could break this - if type(datalist) == str: - datalist = [datalist] - - if np.all(np.array([type(x) == str for x in datalist])): - self.dense_data = [] - if replace != None: - - assert len(replace) == 2, "Replace must contain 2 elements" - - for name in datalist: - self.dense_data.append(replace_vals(name, replace, - delim=delim)) - else: - for name in datalist: - data = csv2rec(name, delimiter=delim) - self.dense_data.append(data) - - elif np.all(np.array([type(x) == np.ndarray for x in datalist])): - self.dense_data = datalist - - if archival: - self.dense_archival = [np.copy(data) for data in - self.dense_data] - else: - self.dense_archival = [] - - def reset_grid_data(self): - ''' - Resets self.grid_data to self.archival_data - - Need to be careful about excessive memory usage! - ''' - - if len(self.dense_archival) == 0: - raise ValueError("The self.dense_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.dense_data = [np.copy(data) for data in self.dense_archival] - - - def dense_to_columnar(self, spp_col_num, num_spp, count_col='count',\ - archival=True): - ''' - This function uses a function in form_func to convert dense data into - columnar data. Stores the columnar data as a Columnar Object. - - Parameters - ---------- - spp_col_num : int - The column number in the dense array where the spp_names begin - - num_spp : tuple or int - Number of species in each dataset in self.dense_data. If it is an - int, it will be broadcasted to the length of self.dense_data - - count_col : str - This string specifies the name of the count column. The default is - 'count'. - - ''' - columnar_data = ff.format_dense(self.dense_data, spp_col_num,\ - num_spp, count_col=count_col) - self.Columnar_Object = Columnar_Data(columnar_data, archival=archival) - - def output_dense_data(self, filenames): - ''' - This function prints the data within self.dense_data with the given - filenames. If self.dense_data has not been filled, error is thrown. - - Parameters - ---------- - filenames : list - A list of filenames to which the data will be saved - - ''' - - assert len(filenames) == len(self.dense_data), "Number of filenames\ - must be the same as the number of datasets" - for i, data in enumerate(self.dense_data): - ff.output_form(data, filenames[i]) - -class Transect_Data: - ''' - This class handles data that are similar to the Breeding Bird survey data. - One column has the species ID, one column has stop and all the other - columns have transects. This class can handle data with "n" nestings, not - just two. For example, the data could have location, transect and stop. - - The "stop" data should all be in consecutive columns - - ''' - - def __init__(self, filenames, delim=',', replace=None, archival=True): - ''' - - Parameters - ---------- - filenames : list - A list of filenames - delim : string - The file delimiter - replace : tuple - A tuple of length 2. The first element is a string which - represents the missing values that you would like to replace. The - second element is the value with which you would like to replace - the missing values. - archival : bool - If True, a copy of self.transect_data is made and stored in - self.transect_archival. If dataset is very large, set to False. - - - ''' - self.transect_data = [] - if type(filenames) == str: - filenames = [filenames] - - if replace != None: - - assert len(replace) == 2, "Replace must contain 2 elements" - replace = (str(replace[0]), replace[1]) - - for name in filenames: - self.transect_data.append(replace_vals(name, replace, - delim=delim)) - else: - for name in filenames: - data = csv2rec(name, delimiter=delim) - self.transect_data.append(data) - - if archival: - self.transect_archival = [np.copy(data) for data in - self.transect_data] - else: - self.transect_archival = [] - - def reset_transect_data(self): - ''' - Resets self.transect_data to self.transect_archival - - Need to be careful about excessive memory usage! - ''' - if len(self.transect_archival) == 0: - raise ValueError("The self.transect_archival attribute of this %s" - % (self.__class__.__name__) + " object has not" - + " been initialized") - else: - self.transect_data = [np.copy(data) for data in - self.transect_archival] - - def transect_to_columnar(self, stop_col_num, tot_stops, stop_name='stop', - count_name='count', archival=True): - ''' - This function takes transect data and convertes it into columnar data. - In addition it saves the columnar data as a Columnar_Data object. - - - Parameters - ---------- - stop_col_num : iterable or int - The column number where the stop counts begin (0 is the first - column). Can be len(transect_data) or length == 1. Broadcast if - length equals 1. - - tot_stops : iterable or int - The number of columns with stops. Can be len(transect_data) or - length == 1. Broadcast if length equals 1. - - stop_name : str - The name of the new stop column in the formatted data - - count_name : str - The name of the count column. Default is "count" - - - Notes - ----- - This function assumes that all data in self.transect_data are formatted - the same way. For example, the column that contains species names or - codes has the same name throughout all data sets. - - ''' - # Broadcast stop_col_num - stop_col_num = ff.broadcast(len(self.transect_data), stop_col_num) - tot_stops = ff.broadcast(len(self.transect_data), tot_stops) - - columnar_data = [] - for j, data in enumerate(self.transect_data): - nstops = tot_stops[j] - dtypes = data.dtype.descr[ : stop_col_num[j] ] - if (len(dtypes) + nstops) != len(data.dtype.names): - #Accounting for data fields after stops - end_dtypes = data.dtype.descr[(len(dtypes) + nstops) : ] - for x in end_dtypes: - dtypes.append(x) - dtypes.append((stop_name, 'S20')) - dtypes.append((count_name, np.int)) - column_data = np.empty(len(data) * nstops, dtype=dtypes) - for i in xrange(len(data)): - for name in column_data.dtype.names: - if name is stop_name: - column_data[name][i * nstops:(i + 1) * nstops] = \ - np.arange(0, nstops) - elif name is count_name: - column_data[name][i * nstops:(i + 1) * nstops] = \ - np.array(list(data[i]))[stop_col_num[j] : \ - -len(end_dtypes)] - else: - column_data[name][i * nstops:(i + 1) * nstops] = \ - data[name][i] - # Remove all zeros - column_data = column_data[column_data[count_name] != 0] - columnar_data.append(column_data) - self.Columnar_Object = Columnar_Data(columnar_data, archival=archival) - - def output_transect_data(self, filenames): - ''' - This function prints the data within self.columnar_data with the given - filenames. If self.columnar_data has not been filled, an error is - thrown. - - Parameters - ---------- - filenames : list - A list of filenames to which the data will be saved. Must be the - same length as self.columnar_data - - ''' - - assert len(filenames) == len(self.transect_data), "Number of filenames\ - must be the same as the number of datasets" - for i, data in self.transect_data: - ff.output_form(data, filenames[i]) - - -def remove_char(grid_list, char='\n'): - ''' - Removes the given char from the end of each cell in grid list - ''' - - for grid in grid_list: - for name in grid.dtype.names: - for i in xrange(len(grid[name])): - while grid[name][i][::-1].find(char) == 0: - grid[name][i] = grid[name][i][:-1] - - return grid_list - -def remove_white_spaces(grid_list): - ''' - Removes all of the white spaces from strings. - ''' - for grid in grid_list: - for name in grid.dtype.names: - for i in xrange(len(grid[name])): - grid[name][i] = ''.join(grid[name][i].split(' ')) - - return grid_list - -def replace_vals(filename, replace, delim=','): - ''' - Replace the values in filename with specified values in replace_values - - Parameters - ---------- - filename : string - Will be read into a rec array - - replace_values : tuple - First object is value to replace and second object is what to replace - it with - - - ''' - data = csv2rec(filename, delimiter=delim, missing=replace[0]) - for nm in data.dtype.names: - try: - # Missing float - isNaN = (np.isnan(data[nm])) - except: - isNaN = np.zeros(len(data[nm]), dtype=bool) - isBlank = np.array([it == '' for it in data[nm]]) - isMinusOne = (data[nm] == -1)# Missing int - # Missing other - isNone = np.array([i == None for i in data[nm]]) - ind = np.bitwise_or(isNaN, isBlank) - ind = np.bitwise_or(ind, isMinusOne) - ind = np.bitwise_or(ind, isNone) - data[nm][ind] = replace[1] - return data - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/macroeco/misc/xest_form_func.py b/macroeco/misc/xest_form_func.py deleted file mode 100644 index 71d7668..0000000 --- a/macroeco/misc/xest_form_func.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/python -#Testing form_func.py - -import unittest -from form_func import * -import numpy as np -import os -from matplotlib.mlab import csv2rec -gcwd = os.getcwd #get current directory -pd = os.path.dirname #get parent directory -chdir = os.chdir #change directories -jp = os.path.join - -class TestFormFunc(unittest.TestCase): - '''Tests the functions with in form_func.py''' - - def setUp(self): - self.spp_array1 = np.array(['as', 'as', 'as', 'as', 'as']) - self.spp_array2 = np.array([2,2,3,5,6,3,4,5,7,8]) - self.spp_array3 = np.array(['as','bn', 'as', 'ty', 'bn']) - self.spp_array4 = np.array([]) - - self.arch1 = open('arch1.csv', 'w') - self.arch1.write('''cell, row, column, AGR, THY, FTW, REW - 1, 1, 1, 0, 1, 1, 0 - 2, 1, 2, 3, 3, 0, 1 - 3, 2, 1, 0, 0, 0, 0 - 4, 2, 2, 1, 5, 1, 0''') - self.arch1.close() - self.arch2 = open('arch2.csv', 'w') - self.arch2.write('''cell, row, column, AGR, THY, FTW, REW - 1, 1, 1, 0, 1, 1, 0 - 2, 1, 2, 3, 3, 0, 1 - 3, 2, 1, 0, 0, 0, 0 - 4, 2, 2, 1, 5, 1, 0''') - self.arch2.close() - - def tearDown(self): - os.remove('arch1.csv') - os.remove('arch2.csv') - - def test_create_intcodes(self): - unq_specs = np.unique(self.spp_array1) - unq_ints = np.linspace(0, len(np.unique(self.spp_array1)) - 1,\ - num=len(np.unique(self.spp_array1))) - tot_int = create_intcodes(self.spp_array1, unq_specs, unq_ints) - self.assertTrue(len(tot_int) == 5) - self.assertTrue(np.unique(tot_int)[0] == .0) - self.assertTrue(np.all(np.equal(tot_int, np.array([.0,.0,.0,.0,.0])))) - unq_specs = np.unique(self.spp_array2) - unq_ints = np.linspace(0, len(np.unique(self.spp_array2)) - 1, \ - num=len(np.unique(self.spp_array2))) - tot_int = create_intcodes(self.spp_array2, unq_specs, unq_ints) - self.assertTrue(len(tot_int) == len(self.spp_array2)) - self.assertTrue(np.all(np.equal(np.unique(tot_int), - np.linspace(0,6,num=7)))) - self.assertRaises(AssertionError, create_intcodes, self.spp_array4, - unq_specs, unq_ints) - - def test_add_field(self): - data = csv2rec('arch1.csv') - data_added = add_field(data, [('test', np.int)]) - names = np.array(data_added.dtype.names) - self.assertTrue(sum(names == 'test') == 1) - - def test_merge_formatted(self): - data1 = csv2rec('arch1.csv') - data2 = csv2rec('arch2.csv') - dl = [data1, data2] - merged = merge_formatted(dl) - self.assertTrue(sum(merged['rew']) == 2) - self.assertTrue(sum(merged['column']) == 12) - - def test_add_data_fields(self): - data1 = csv2rec('arch1.csv') - data2 = csv2rec('arch2.csv') - dl = [data1, data2] - alt_data = add_data_fields(dl, {'year': (1998, 2002)}) - self.assertTrue(np.all(alt_data[0]['year'] == '1998')) - self.assertTrue(np.all(alt_data[1]['year'] == '2002')) - alt_data = add_data_fields(dl, {'year' : (1998, 2002), 'why': ('h', - 'a')}) - self.assertTrue(np.all(alt_data[0]['why'] == 'h')) - - def test_fractionate(self): - data1 = csv2rec('arch1.csv') - data2 = csv2rec('arch2.csv') - dl = [data1, data2] - fr = fractionate(dl, (10, 10), (5, 5), ['row', 'column']) - self.assertTrue(fr[0]['row'][3] == 5) - self.assertTrue(fr[1]['column'][2] == 0) - - def test_format_dense(self): - data1 = csv2rec('arch1.csv') - data2 = csv2rec('arch2.csv') - dl = [data1, data2] - form = format_dense(dl, 3, (4,4)) - self.assertTrue(np.all(form[0]['count'][:4] == np.array([1,1,3,3]))) - self.assertTrue(np.all(form[1]['count'] == - np.array([1,1,3,3,1,1,5,1]))) - - - - - - - - diff --git a/macroeco/misc/xest_format_data.py b/macroeco/misc/xest_format_data.py deleted file mode 100644 index b21cc71..0000000 --- a/macroeco/misc/xest_format_data.py +++ /dev/null @@ -1,475 +0,0 @@ -#!/usr/bin/python - -'''Testing the classes in format_data.py''' - -import unittest -import numpy as np -import format_data as form -import os -import glob -import copy -gcwd = os.getcwd #get current directory -pd = os.path.dirname #get parent directory -chdir = os.chdir #change directories -jp = os.path.join - -class TestFormatData(unittest.TestCase): - '''Tests the classes within format_data.py''' - - def setUp(self): - - self.grid1 = open('grid1.csv', 'w') - self.grid1.write('''Harry-1+Joshua - 6+, hg-4+ty - 67,\nHarry-3+Joshua-1+y-34+ty - 87, hg-23''') - self.grid1.close() - - # Heavily nested names and blank cell - self.grid2 = open('grid2.csv', 'w') - self.grid2.write('''aaaa - 5&aaaa - 4 & aaaa - 3, aa - 2&a - 5, - aaa - 4& aaaa- 3& a - 1, ''') - self.grid2.close() - - # Grid to be cut - self.grid3 = open('grid3.csv', 'w') - self.grid3.write('''aaaa - 5*&aaaa - 4* & aa*aa - *3*$please, aa* -2*&a - 5will I be cut 7658?, - aa*a -* 4*& aa*aa- 3*& a* - 1*%maybe, **''') - self.grid3.close() - - self.dense1 = open('dense1.csv', 'w') - self.dense1.write('''column, row, fry, the, eggs, well, please - 0,0,1,2,3,4,5 - 0,1,0,0,,0,23 - 1,0,,,5,45,0 - 1,1,1,1,1,1,1''') - self.dense1.close() - - self.dense2 = open('dense2.csv', 'w') - self.dense2.write('''column, row, fry, the, eggs, well, please - 0,0,1,2,3,4,5 - 0,1,0,0,NA,0,23 - 1,0,NA,NA,5,45,0 - 1,1,1,1,1,1,1''') - self.dense2.close() - - self.dense3 = open('dense3.csv', 'w') - self.dense3.write('''column, row, fry, the, eggs, well, please, j - 0,0,1,2,3,4,5,2 - 0,1,0,0,NA,0,23,5 - 1,0,NA,NA,5,45,0,6 - 1,1,1,1,1,1,1,7''') - self.dense3.close() - - self.dense4 = open('dense4.csv', 'w') - self.dense4.write('''column, row, fry, the, eggs, well, please, j,h - 0,0,1,2,3,4,5,2,t - 0,1,0,0,0,0,23,5,u - 1,0,1,0,5,45,0,6,k - 1,1,1,1,1,1,1,7,m''') - self.dense4.close() - - self.trans1 = open('trans1.csv', 'w') - self.trans1.write( -'''spp, island, tree, b1, b2, b3, b4, b5, nm, fun -h,Marta,1,1,2,3,4,5,j,56 -t,Marta,2,1,1,1,1,0,k,78 -h,Garry,1,2,3,4,5,6,j,123 -t,Garry,2,0,1,2,0,5,u,456''') - self.trans1.close() - - self.col1 = open('col1.csv', 'w') - self.col1.write('''SPP, x, y, dBh1, dbH%2, john -l,1,1,34,38,g -y,2,1,100,10,g -h,1,2,1,1,g -y,2,2,300,2,f''') - self.col1.close() - - self.col2 = open('col2.csv', 'w') - self.col2.write('''sp+P, x, y, dbh1, dbh2, joH%n -l,1,,34,38,g -y,2,1,100,10,g -h,,2,1,1,NA -y,2,1,300,2,f''') - self.col2.close() - - - - def tearDown(self): - os.remove('grid1.csv') - os.remove('grid2.csv') - os.remove('grid3.csv') - os.remove('dense1.csv') - os.remove('dense2.csv') - os.remove('dense3.csv') - os.remove('dense4.csv') - os.remove('trans1.csv') - os.remove('col1.csv') - os.remove('col2.csv') - - def test_Grid_Data(self): - grid = form.Grid_Data('grid1.csv', spp_sep='+') - grid.find_unique_spp_in_grid(spacer='-', spp_sep='+') - - # Does it find the right species? - spp_list = np.array(['Harry', 'Joshua', 'hg', 'ty', 'y']) - unq_spp = grid.unq_spp_lists[0] - self.assertTrue(np.all(spp_list == unq_spp)) - - # If I don't truncate '+', it still finds the right species - grid = form.Grid_Data('grid1.csv') - grid.find_unique_spp_in_grid(spacer='-', spp_sep='+') - - spp_list = np.array(['Harry', 'Joshua', 'hg', 'ty', 'y']) - unq_spp = grid.unq_spp_lists[0] - self.assertTrue(np.all(spp_list == unq_spp)) - - # Test that the Dense plot is made correctly - grid = form.Grid_Data('grid1.csv', spp_sep='+') - grid.grid_to_dense(spacer='-', spp_sep='+') - columns = ('cell', 'row', 'column', 'Harry', 'Joshua', 'hg', 'ty', - 'y') - test_names = grid.Dense_Object.dense_data[0].dtype.names - self.assertTrue(np.all(test_names == columns)) - - # Test that values are correct - dense_obj = grid.Dense_Object - pred = np.array([0,0,4,23]) - test = dense_obj.dense_data[0]['hg'] - self.assertTrue(np.all(pred == test)) - pred = np.array([1,3,0,0]) - test = dense_obj.dense_data[0]['Harry'] - self.assertTrue(np.all(pred == test)) - pred = np.array([6,1,0,0]) - test = dense_obj.dense_data[0]['Joshua'] - self.assertTrue(np.all(pred == test)) - pred = np.array([0,34,0,0]) - test = dense_obj.dense_data[0]['y'] - self.assertTrue(np.all(pred == test)) - pred = np.array([0,87,67,0]) - test = dense_obj.dense_data[0]['ty'] - self.assertTrue(np.all(pred == test)) - - # Tested heavy nesting and empty cell - grid = form.Grid_Data('grid2.csv', 2) - grid.find_unique_spp_in_grid(spacer='-', spp_sep='&') - unq_spp = np.array(['a', 'aa', 'aaa', 'aaaa']) - pred = grid.unq_spp_lists[0] - self.assertTrue(np.all(unq_spp == pred)) - - grid.grid_to_dense(spacer='-', spp_sep='&') - dense_obj = grid.Dense_Object - pred = np.array([0,1,5, 0]) - test = dense_obj.dense_data[0]['a'] - self.assertTrue(np.all(pred == test)) - pred = np.array([0,0,2, 0]) - test = dense_obj.dense_data[0]['aa'] - self.assertTrue(np.all(pred == test)) - pred = np.array([0,4,0, 0]) - test = dense_obj.dense_data[0]['aaa'] - self.assertTrue(np.all(pred == test)) - pred = np.array([12,3,0, 0]) - test = dense_obj.dense_data[0]['aaaa'] - self.assertTrue(np.all(pred == test)) - - # Testing remove, replace, and truncation functions - grid = form.Grid_Data('grid3.csv', spp_sep='&') - grid.truncate_grid_cells(['$pl', 'will', '%may']) - grid.remove_and_replace('*', '') - - grid.find_unique_spp_in_grid(spacer='-', spp_sep='&') - unq_spp = np.array(['a', 'aa', 'aaa', 'aaaa']) - pred = grid.unq_spp_lists[0] - self.assertTrue(np.all(unq_spp == pred)) - - grid.grid_to_dense(spacer='-', spp_sep='&') - dense_obj = grid.Dense_Object - pred = np.array([0,1,5, 0]) - test = dense_obj.dense_data[0]['a'] - self.assertTrue(np.all(pred == test)) - pred = np.array([0,0,2, 0]) - test = dense_obj.dense_data[0]['aa'] - self.assertTrue(np.all(pred == test)) - pred = np.array([0,4,0, 0]) - test = dense_obj.dense_data[0]['aaa'] - self.assertTrue(np.all(pred == test)) - pred = np.array([12,3,0, 0]) - test = dense_obj.dense_data[0]['aaaa'] - self.assertTrue(np.all(pred == test)) - - # Testing reset to archival - grid.reset_grid_data() - temp_str = 'aaaa-5*&aaaa-4*&aa*aa-*3*$please' - data_str = grid.grid_data[0]['0'][0] - self.assertTrue(temp_str == data_str) - - - # Test that multiple data sets work - - grid = form.Grid_Data(glob.glob('grid*.csv'), archival=False) - - # reset_archival should fail in this case - self.assertRaises(ValueError, grid.reset_grid_data) - - # All the truncation should make the the two data sets equal - grid.truncate_grid_cells(['$pl', 'will', '%may']) - grid.remove_and_replace('*', '') - for col in xrange(grid.cols[0]): - for row in xrange(len(grid.grid_data[0])): - self.assertTrue(grid.grid_data[1][col][row] ==\ - grid.grid_data[2][col][row]) - - - def test_Dense_Data(self): - - # Test that the expected values are read in - dense = form.Dense_Data('dense1.csv', replace=('', 0)) - spp_arr = np.array([1,0,0,1]) - read_in = dense.dense_data[0]['fry'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([2,0,0,1]) - read_in = dense.dense_data[0]['the'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([3,0,5,1]) - read_in = dense.dense_data[0]['eggs'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([4,0,45,1]) - read_in = dense.dense_data[0]['well'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([5,23,0,1]) - read_in = dense.dense_data[0]['please'] - self.assertTrue(np.all(spp_arr == read_in)) - - # NAs should all be turned to 0's - dense = form.Dense_Data('dense1.csv', replace=('NA', 0)) - spp_arr = np.array([1,0,0,1]) - read_in = dense.dense_data[0]['fry'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([2,0,0,1]) - read_in = dense.dense_data[0]['the'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([3,0,5,1]) - read_in = dense.dense_data[0]['eggs'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([4,0,45,1]) - read_in = dense.dense_data[0]['well'] - self.assertTrue(np.all(spp_arr == read_in)) - spp_arr = np.array([5,23,0,1]) - read_in = dense.dense_data[0]['please'] - self.assertTrue(np.all(spp_arr == read_in)) - - # Test dense_to_columnar - dense = form.Dense_Data(['dense2.csv', 'dense3.csv'], replace=('NA',0)) - dense.dense_to_columnar(2, (5,6)) - col = dense.Columnar_Object - col.merge_data() - unq_spp = np.unique(['eggs', 'fry', 'the', 'well', 'please', 'j']) - pred_unq_spp = np.unique(col.merged_data['spp']) - self.assertTrue(np.all(unq_spp == pred_unq_spp)) - count = [1,2,3,4,5] - self.assertTrue(np.all(count == col.merged_data['count'][:5])) - count = [1,1,1,1,1,7] - self.assertTrue(np.all(count == col.merged_data['count'][-6:])) - self.assertTrue(len(col.merged_data) == 30) - self.assertTrue(col.merged_data['count'][5] == 23) - self.assertTrue(col.merged_data['spp'][5] == 'please') - - # Test correct extension of num_spp - dense = form.Dense_Data(['dense2.csv', 'dense2.csv'], replace=('NA',0)) - self.assertRaises(TypeError, dense.dense_to_columnar, 2, (5,6,7)) - dense.dense_to_columnar(2, 5) - col = dense.Columnar_Object - count = np.array([1,2,3,4,5]) - self.assertTrue(np.all(col.columnar_data[0]['count'][:5] == count)) - self.assertTrue(np.all(col.columnar_data[1]['count'][:5] == count)) - - # Test trailing column after species - dense = form.Dense_Data(['dense4.csv']) - dense.dense_to_columnar(2, 5) - col = dense.Columnar_Object - comp = np.array([2,2,2,2,2,5,6,6,6,7,7,7,7,7]) - self.assertTrue(np.all(comp == col.columnar_data[0]['j'])) - comp = np.array(['t', 't', 't', 't', 't', 'u', 'k', 'k', 'k', 'm', 'm', - 'm', 'm', 'm']) - self.assertTrue(np.all(comp == col.columnar_data[0]['h'])) - - def test_Transect_Data(self): - - - # Already tested replace_vals test_Dense_Data - trans = form.Transect_Data('trans1.csv', replace=('0', 1)) - trans.transect_to_columnar(3, 5) - col = trans.Columnar_Object - count = np.array([1,2,3,4,5,1,1,1,1,1,2,3,4,5,6,1,1,2,1,5]) - self.assertTrue(np.all(count == col.columnar_data[0]['count'])) - - # Test that transect data reads in correctly and converts to columnar - trans = form.Transect_Data('trans1.csv') - trans.transect_to_columnar(3, 5) - col = trans.Columnar_Object - count = np.array([1,2,3,4,5,1,1,1,1,2,3,4,5,6,1,2,5]) - self.assertTrue(np.all(count == col.columnar_data[0]['count'])) - - # Test multiple datasets are converted correctly - trans = form.Transect_Data(['trans1.csv', 'trans1.csv']) - trans.transect_to_columnar(3,5) - col = trans.Columnar_Object - col.merge_data() - self.assertTrue(np.all(np.concatenate((count, count)) == - col.merged_data['count'])) - def test_Columnar_Data(self): - - # Testing missing values - col = form.Columnar_Data('col2.csv', missingd={'y' : '', 'x' : '', - 'john' : 'NA'}, delete_missing=True) - self.assertTrue(len(col.columnar_data[0]) == 2) - self.assertTrue(np.all(col.columnar_data[0]['spp'] == np.array(['y', - 'y']))) - self.assertTrue(np.all(col.columnar_data[0]['dbh1'] == np.array([100, - 300]))) - - # No missing values; Test subsetting - col = form.Columnar_Data('col1.csv') - col.subset_data({'JOHN' : ('!=', 'f')}) - self.assertTrue(np.all(col.columnar_data[0]['john'] == np.array(['g', - 'g', 'g']))) - # Test reset - col.reset_columnar_data() - check = np.array(['g','g','g','f']) - self.assertTrue(np.all(col.columnar_data[0]['john'] == check)) - - # Test splitting - col.split_up_data_by_field([('D&Bh1',), ('dbh2',)]) - self.assertTrue(len(col.columnar_data) == 2) - dbh1 = np.array([34,100,1,300]) - dbh2 = np.array([38,10,1,2]) - try: - col.columnar_data[0]['dbh2'] - except ValueError: - pass - - try: - col.columnar_data[1]['dbh1'] - except ValueError: - pass - - self.assertTrue(np.all(col.columnar_data[0]['dbh1'] == dbh1)) - self.assertTrue(np.all(col.columnar_data[1]['dbh2'] == dbh2)) - - col.reset_columnar_data() - - col.split_up_data_by_field([('spp', 'x'), ('y',)]) - self.assertTrue(len(col.columnar_data) == 2) - td1 = np.array(['spp', 'x', 'dbh1', 'dbh2', 'john']) - td2 = np.array(['y', 'dbh1', 'dbh2', 'john']) - d1 = np.array(col.columnar_data[0].dtype.names) - d2 = np.array(col.columnar_data[1].dtype.names) - self.assertTrue(np.all(d1 == td1)) - self.assertTrue(np.all(d2 == td2)) - - # Test change column names - col.reset_columnar_data() - col.split_up_data_by_field([('dbh1',), ('dbh2',)]) - self.assertRaises(ValueError,col.change_column_names, [('x', 'y')], - ['hello']) - - col.reset_columnar_data() - col.split_up_data_by_field([('dbh1',), ('dbh2',)]) - col.change_column_names([('dbh1', 'dbh2'), ('john',)], ['dbh', 'harry']) - nms = np.array(['spp', 'x', 'y', 'dbh', 'harry']) - dtnms1 = np.array(col.columnar_data[0].dtype.names) - dtnms2 = np.array(col.columnar_data[1].dtype.names) - self.assertTrue(np.all(nms == dtnms1)) - self.assertTrue(np.all(nms == dtnms2)) - - # Test if long names added - col.reset_columnar_data() - col.split_up_data_by_field([('dbh1',), ('dbh2',)]) - col.change_column_names([('dbh1', 'dbh2')], ['goofy_chew']) - nms = np.array(['spp', 'x', 'y', 'goofy_chew', 'john']) - dtnms1 = np.array(col.columnar_data[0].dtype.names) - dtnms2 = np.array(col.columnar_data[1].dtype.names) - self.assertTrue(np.all(nms == dtnms1)) - self.assertTrue(np.all(nms == dtnms2)) - - # Test adding fields to data - col.reset_columnar_data() - col.split_up_data_by_field([('dbh1',), ('dbh2',)]) - col.change_column_names([('dbh1', 'dbh2')], ['dbh']) - col.add_fields_to_data_list({'year' : (1998, 2001), 'body' : ('large', - 'small')}) - year1 = np.repeat('1998', 4) - year2 = np.repeat('2001', 4) - body1 = np.repeat('large', 4) - body2 = np.repeat('small', 4) - self.assertTrue(np.all(year1 == col.columnar_data[0]['year'])) - self.assertTrue(np.all(year2 == col.columnar_data[1]['year'])) - self.assertTrue(np.all(body1 == col.columnar_data[0]['body'])) - self.assertTrue(np.all(body2 == col.columnar_data[1]['body'])) - - # Test adding different dtypes - col.reset_columnar_data() - col.split_up_data_by_field([('dbh1',), ('dbh2',)]) - col.change_column_names([('dbh1', 'dbh2')], ['dbh']) - col.add_fields_to_data_list({'year' : (1998, 2001), 'body' : ('large', - 'small')}, descr={'year': np.int, - 'body': 'S20'}) - - year1 = np.repeat(1998, 4) - year2 = np.repeat(2001, 4) - body1 = np.repeat('large', 4) - body2 = np.repeat('small', 4) - self.assertTrue(np.all(year1 == col.columnar_data[0]['year'])) - self.assertTrue(np.all(year2 == col.columnar_data[1]['year'])) - self.assertTrue(np.all(body1 == col.columnar_data[0]['body'])) - self.assertTrue(np.all(body2 == col.columnar_data[1]['body'])) - - # Test remove columns - col = form.Columnar_Data(['col1.csv', 'col2.csv'], missingd={'y' : '', - 'x' : '', 'john' : 'NA'}, delete_missing=True) - self.assertTrue(len(col.columnar_data[0]) == 4) - self.assertTrue(len(col.columnar_data[1]) == 2) - col.remove_columns('john') - test_nm = set(['x','y', 'spp', 'dbh1', 'dbh2']) - self.assertTrue(test_nm == set(col.columnar_data[0].dtype.names)) - self.assertTrue(test_nm == set(col.columnar_data[1].dtype.names)) - - col.remove_columns(['x', 'y']) - test_nm = set(['spp', 'dbh1', 'dbh2']) - self.assertTrue(test_nm == set(col.columnar_data[0].dtype.names)) - self.assertTrue(test_nm == set(col.columnar_data[1].dtype.names)) - - # Try removing row that is not there, no error is thrown - col.remove_columns(['x']) - self.assertTrue(test_nm == set(col.columnar_data[0].dtype.names)) - self.assertTrue(test_nm == set(col.columnar_data[1].dtype.names)) - - # Fractionate is tested in test_form_func.py. Test if wid_len_old etc. - # parameters return expected results - col.reset_columnar_data() - temp_col = copy.deepcopy(col.columnar_data) - col.columnar_data = [temp_col[0]] - col.fractionate_data((1,1), (.5,.5), ('x', 'y')) - self.assertTrue(np.all(np.array([0,.5,0,.5]) == - col.columnar_data[0]['x'])) - col.columnar_data = [temp_col[1]] - col.fractionate_data((1,1), (.5,.5), ('x', 'y'), (2,2), (1,1), (1,1)) - self.assertTrue(np.all(np.array([.5,.5]) == - col.columnar_data[0]['x'])) - self.assertTrue(np.all(np.array([0,0]) == - col.columnar_data[0]['y'])) - - col.columnar_data = temp_col - - # Test merge data - col.merge_data() - self.assertTrue(len(col.merged_data) == 6) - spp = np.array(['l','y','h','y','y','y']) - self.assertTrue(np.all(col.merged_data['spp'] == spp)) - dbh2 = np.array([34,100,1,300,100,300]) - self.assertTrue(np.all(col.merged_data['dbh1'] == dbh2)) - - # Try to break merge data - col.columnar_data = [col.merged_data] - col.merge_data() - -if __name__ == '__main__': - unittest.main() From 040a6d0a0acd74ce14cd6dc304d360abffcfd1a4 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 9 Apr 2014 15:52:29 -0700 Subject: [PATCH 221/343] Add METE SAR and EAR --- macroeco/empirical/__init__.py | 2 +- macroeco/empirical/empirical.py | 155 ++++++++------------- macroeco/main/main.py | 68 +++++----- macroeco/models/__init__.py | 4 +- macroeco/models/_curves.py | 219 ++++++++++++++++++++++++++---- macroeco/models/_distributions.py | 6 +- 6 files changed, 294 insertions(+), 160 deletions(-) diff --git a/macroeco/empirical/__init__.py b/macroeco/empirical/__init__.py index ba0ba04..48c7070 100644 --- a/macroeco/empirical/__init__.py +++ b/macroeco/empirical/__init__.py @@ -31,5 +31,5 @@ """ from .empirical import (Patch, - sad, ssad, sar, ear, comm_grid, + sad, ssad, sar, comm_grid, empirical_cdf) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/empirical.py index 317e8a3..4b43f9a 100644 --- a/macroeco/empirical/empirical.py +++ b/macroeco/empirical/empirical.py @@ -22,7 +22,7 @@ metric_params = \ """patch : Patch obj Patch object containing data for analysis - cols : dict + cols : str Indicates which column names in patch data table are associated with species identifiers, counts, energy, and mass. See Notes. splits : str @@ -416,19 +416,17 @@ def ssad(patch, cols, splits): @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note, division_note) -def sar(patch, cols, splits, divs): +def sar(patch, cols, splits, divs, ear=False): """ - Calculates an empirical species area relationship + Calculates an empirical species area or endemics area relationship Parameters ---------- {0} divs : str Description of how to divide x_col and y_col. See notes. - full_output : bool - If True, tuples in result contain a third element with a row for each - area (as in the main result) and columns containing richness for all - suplots at that division. + ear : bool + If True, calculates an endemics area relationship Returns ------- @@ -440,8 +438,8 @@ def sar(patch, cols, splits, divs): ----- {2} - For the SAR, cols must also contain x_col and y_col, giving the x and y - dimensions along which to grid the patch. + For the SAR and EAR, cols must also contain x_col and y_col, giving the x + and y dimensions along which to grid the patch. {3} @@ -449,97 +447,61 @@ def sar(patch, cols, splits, divs): """ - (spp_col, x_col, y_col), patch = \ - _get_cols(['spp_col', 'x_col', 'y_col'], cols, patch) - - # Loop through each split - result_list = [] - for substring, subpatch in _yield_subpatches(patch, splits): - - # Loop through all divisions within this split - subresultx = [] - subresulty = [] - subdivlist = _split_divs(divs) - for divs in subdivlist: - spatial_table = _yield_spatial_table(subpatch, divs, spp_col, - x_col, y_col) - subresulty.append(np.mean(spatial_table['spp_count'])) # n spp - subresultx.append(1 / eval(divs.replace(',', '*'))) # a frac - - # Append subset result - subresult = pd.DataFrame({'div': subdivlist, 'x': subresultx, - 'y': subresulty}) - result_list.append((substring, subresult)) + def sar_y_func(spatial_table, all_spp): + return np.mean(spatial_table['n_spp']) + + def ear_y_func(spatial_table, all_spp): + endemic_counter = 0 + for spp in all_spp: + spp_in_cell = [spp in x for x in spatial_table['spp_set']] + spp_n_cells = np.sum(spp_in_cell) + if spp_n_cells == 1: # If a spp is in only 1 cell, endemic + endemic_counter += 1 + n_cells = len(spatial_table) + return endemic_counter / n_cells # mean endemics / cell + + if ear: + y_func = ear_y_func + else: + y_func = sar_y_func - # Return all results - return result_list + return _sar_ear_inner(patch, cols, splits, divs, y_func) -@log_start_end -@doc_sub(metric_params, metric_return, cols_note, splits_note, division_note) -def ear(patch, cols, splits, divs): +def _sar_ear_inner(patch, cols, splits, divs, y_func): """ - Calculates an empirical endemics area relationship - - Parameters - ---------- - {0} - divisions : str - Description of how to divide x_col and y_col. See notes. - - Returns - ------- - {1} Result has three columns, div, x, and y, that give the ID for the - division given as an argument, fractional area, and the mean number of - endemics per cell at that division. - - Notes - ----- - {2} - - For the EAR, cols must also contain x_col and y_col, giving the x and y - dimensions along which to grid the patch. - - {3} - - {4} - + y_func is function calculating the mean number of species or endemics, + respectively, for the SAR or EAR """ - (spp_col, x_col, y_col), patch = \ - _get_cols(['spp_col', 'x_col', 'y_col'], cols, patch) + (spp_col, count_col, x_col, y_col), patch = \ + _get_cols(['spp_col', 'count_col', 'x_col', 'y_col'], cols, patch) # Loop through each split result_list = [] for substring, subpatch in _yield_subpatches(patch, splits): - all_spp = np.unique(subpatch.table[spp_col]) - # Loop through all divisions within this split + all_spp = np.unique(subpatch.table[spp_col]) subresultx = [] subresulty = [] + subresultnspp = [] + subresultnindivids = [] subdivlist = _split_divs(divs) - for divs in subdivlist: - spatial_table = _yield_spatial_table(subpatch, divs, spp_col, - x_col, y_col) - - endemic_counter = 0 - for spp in all_spp: - spp_in_cell = [spp in x for x in spatial_table['spp_set']] - spp_n_cells = np.sum(spp_in_cell) - if spp_n_cells == 1: # If a spp is in only 1 cell, endemic - endemic_counter += 1 - - n_cells = len(spatial_table) - subresulty.append(endemic_counter / n_cells) # mean endemics / cell - subresultx.append(1 / eval(divs.replace(',', '*'))) # a frac + for subdiv in subdivlist: + spatial_table = _yield_spatial_table(subpatch, subdiv, spp_col, + count_col, x_col, y_col) + subresulty.append(y_func(spatial_table, all_spp)) + subresultx.append(1 / eval(subdiv.replace(',', '*'))) # a frac + subresultnspp.append(np.mean(spatial_table['n_spp'])) + subresultnindivids.append(np.mean(spatial_table['n_individs'])) # Append subset result subresult = pd.DataFrame({'div': subdivlist, 'x': subresultx, - 'y': subresulty}) + 'y': subresulty, 'n_spp': subresultnspp, + 'n_individs': subresultnindivids}) result_list.append((substring, subresult)) - # Return all results return result_list @@ -584,8 +546,8 @@ def comm_grid(patch, cols, splits, divs, metric='Sorensen'): """ - (spp_col, x_col, y_col), patch = \ - _get_cols(['spp_col', 'x_col', 'y_col'], cols, patch) + (spp_col, count_col, x_col, y_col), patch = \ + _get_cols(['spp_col', 'count_col', 'x_col', 'y_col'], cols, patch) # Loop through each split result_list = [] @@ -593,10 +555,10 @@ def comm_grid(patch, cols, splits, divs, metric='Sorensen'): # Get spatial table and break out columns spatial_table = _yield_spatial_table(subpatch, divs, spp_col, - x_col, y_col) + count_col, x_col, y_col) spp_set = spatial_table['spp_set'] cell_loc = spatial_table['cell_loc'] - spp_count = spatial_table['spp_count'] + n_spp = spatial_table['n_spp'] # Get all possible pairwise combinations of cells pair_list = [] @@ -611,7 +573,7 @@ def comm_grid(patch, cols, splits, divs, metric='Sorensen'): ij_intersect = spp_set[i] & spp_set[j] if metric.lower() == 'sorensen': - comm = 2*len(ij_intersect) / (spp_count[i]+spp_count[j]) + comm = 2*len(ij_intersect) / (n_spp[i] + n_spp[j]) elif metric.lower() == 'jaccard': comm = len(ij_intersect) / len(spp_set[i] | spp_set[j]) else: @@ -628,7 +590,7 @@ def comm_grid(patch, cols, splits, divs, metric='Sorensen'): return result_list -def _yield_spatial_table(patch, div, spp_col, x_col, y_col): +def _yield_spatial_table(patch, div, spp_col, count_col, x_col, y_col): """ Calculates an empirical spatial table @@ -642,7 +604,7 @@ def _yield_spatial_table(patch, div, spp_col, x_col, y_col): The spatial table is the precursor to the SAR, EAR, and grid-based commonality metrics. Each row in the table corresponds to a cell created by a given division. Columns are cell_loc (within the grid defined by the - division), spp_count, and spp_set. + division), spp_set, n_spp, and n_individs. """ @@ -651,7 +613,7 @@ def _yield_spatial_table(patch, div, spp_col, x_col, y_col): y_col + ':' + div_split_list[1]) # Get cell_locs - # Requires _parse_splits and _product functions to go through x then y + # Requires _parse_splits and _product functions to go y inside of x x_starts, x_ends = _col_starts_ends(patch, x_col, div_split_list[0]) x_offset = (x_ends[0] - x_starts[0]) / 2 x_locs = x_starts + x_offset @@ -663,16 +625,19 @@ def _yield_spatial_table(patch, div, spp_col, x_col, y_col): cell_locs = _product(x_locs, y_locs) # Get spp set and count for all cells - spp_count_list = [] # Number of species in cell + n_spp_list = [] # Number of species in cell + n_individs_list = [] spp_set_list = [] # Set object giving unique species IDs in cell - for cellstring, cellpatch in _yield_subpatches(patch, div_split): + for cellstring, cellpatch in _yield_subpatches(patch,div_split,name='div'): spp_set = set(np.unique(cellpatch.table[spp_col])) spp_set_list.append(spp_set) - spp_count_list.append(len(spp_set)) + n_spp_list.append(len(spp_set)) + n_individs_list.append(np.sum(cellpatch.table[count_col])) # Create and return dataframe - df = pd.DataFrame({'cell_loc': cell_locs, 'spp_count': spp_count_list, - 'spp_set': spp_set_list}) + df = pd.DataFrame({'cell_loc': cell_locs, 'spp_set': spp_set_list, + 'n_spp': n_spp_list, 'n_individs': n_individs_list}) + return df @@ -1267,7 +1232,7 @@ def _get_cols(special_col_names, cols, patch): return tuple(result), patch @doc_sub(splits_note) -def _yield_subpatches(patch, splits): +def _yield_subpatches(patch, splits, name='split'): """ Iterator for subtables defined by a splits string @@ -1292,7 +1257,7 @@ def _yield_subpatches(patch, splits): if splits: subset_list = _parse_splits(patch, splits) for subset in subset_list: - log.info('Analyzing subset: %s' % subset) + log.info('Analyzing subset %s: %s' % (name, subset)) subpatch = copy.copy(patch) subpatch.table = _subset_table(patch.table, subset) subpatch.meta = _subset_meta(patch.meta, subset) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 05b36c0..282acec 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -203,7 +203,7 @@ def _get_args_kwargs(options, module): if module == 'emp': options = _emp_extra_options(options) - arg_names, kw_names = _arg_kwarg_lists(options, module) + arg_names, kw_names = _arg_kwarg_lists(module, options['analysis']) # Create list of values for arg_names args = [] @@ -251,6 +251,7 @@ def _emp_extra_options(options): if not os.path.isfile(metadata_path): raise IOError, ("Path to metadata file %s is invalid." % metadata_path) + options['metadata_path'] = metadata_path # Using subset if given, create and store patch subset = options.get('subset', '') @@ -265,11 +266,11 @@ def _emp_extra_options(options): return options -def _arg_kwarg_lists(options, module): +def _arg_kwarg_lists(module, analysis): # Get names of args and kwargs to method specified by analysis option exec ("arg_and_kwd_names, _, _, kw_defaults = " - "inspect.getargspec(%s.%s)" % (module, options['analysis'])) + "inspect.getargspec(%s.%s)" % (module, analysis)) if kw_defaults: # If there are kwargs arg_names = arg_and_kwd_names[:-len(kw_defaults)] kw_names = arg_and_kwd_names[-len(kw_defaults):] @@ -280,7 +281,7 @@ def _arg_kwarg_lists(options, module): # Inspection for rv classes doesn't work since it uses args internally # Unless method is translate_args or fit_mle, appends shapes to args try: - obj_meth = options['analysis'].split('.') + obj_meth = analysis.split('.') if obj_meth[1] not in ['fit_mle', 'translate_args']: arg_names += eval(module+'.'+obj_meth[0]+'.'+"shapes.split(',')") except: @@ -316,6 +317,7 @@ def _fit_models(options, core_results): """ + log.info("Fitting models") models = options['models'].replace(' ', '').split(';') # TODO: Make work for 2D results, i.e., curves, comm_sep, o_ring @@ -324,7 +326,7 @@ def _fit_models(options, core_results): for core_result in core_results: # Each subset fit_result = {} for model in models: - fits = _get_fits(core_result, model) + fits = _get_fits(core_result, model, options) values = _get_values(core_result, model, fits) stat_names, stats = _get_comparison_stat(core_result, values, model, fits) @@ -334,28 +336,35 @@ def _fit_models(options, core_results): return fit_results -def _get_fits(core_result, model): +def _get_fits(core_result, model, options): - y = core_result[1]['y'].values - try: - result = eval("mod.%s.fit_mle(y)" % model) - except: - x = core_result[1]['x'].values - result = eval("mod.%s.fit_lsq(x, y)" % model) - return result + options_copy = {} + for key, val in options.iteritems(): + if key not in ['patch']: # Ignore patch since won't deepcopy + options_copy[key] = copy.deepcopy(val) + + model_obj = eval('mod.' + model) + if hasattr(model_obj, 'fit_mle'): + options_copy['analysis'] = model + '.' + 'fit_mle' + options_copy['data'] = core_result[1]['y'].values + else: + options_copy['analysis'] = model + '.' + 'fit_lsq' + options_copy['x'] = core_result[1]['x'].values + options_copy['y_obs'] = core_result[1]['y'].values + options_copy['df'] = core_result[1] # Entire result df, for mete_sar + + return _call_analysis_function(options_copy, 'mod') def _get_values(core_result, model, fits): - try: + model_obj = eval('mod.' + model) + if hasattr(model_obj, 'vals'): x = core_result[1]['x'].values # Calc model at x values values = eval("mod.%s.vals(x, *fits)" % model) - except: - x = core_result[1]['y'].values # Calc model at data values - try: - values = eval("mod.%s.pdf(x, *fits)" % model) - except AttributeError: - values = eval("mod.%s.pmf(x, *fits)" % model) + else: + n = len(core_result[1]) # Calc model at data values + values = eval("mod.%s.rank(n, *fits)" % model) return values @@ -502,6 +511,7 @@ def _write_comparison_plot_table(spid, models, options, core_results, also given. """ + # TODO: Clean up sorting, may not work if SAR x out of order, e.g. is_curve = 'x' in core_results[0][1] core_result = core_results[spid][1] @@ -519,12 +529,7 @@ def _write_comparison_plot_table(spid, models, options, core_results, # Add residual column for each model for model in models: fit_result = fit_results[spid][model] - shapes = fit_result[0] - if is_curve: - result = eval("mod.%s.vals(df['x'].values, *shapes)" - % model)['y'].values - else: - result = eval("mod.%s.rank(len(df['x']), *shapes)" % model)[::-1] + result = fit_result[1]['y'].values[::-1] df[model] = result df[model + "_residual"] = result - df['empirical'] @@ -538,30 +543,31 @@ def _write_comparison_plot_table(spid, models, options, core_results, # Save plot fig, (ax1, ax2) = plt.subplots(1, 2) - ax1.plot(df['x'], df[models]) ax1.scatter(df['x'], df['empirical'], color='k') + ax1.plot(df['x'], df[models]) ax1.legend(models + ['empirical'], loc='best') ax1.set_xlabel('x') ax1.set_ylabel('value') - ax2.plot(df['x'], df[[x + '_residual' for x in models]]) ax2.hlines(0, np.min(df['x']), np.max(df['x'])) + ax2.plot(df['x'], df[[x + '_residual' for x in models]]) ax2.legend(models + ['empirical'], loc='best') - #ax2.set_ylim((-1 * np.max(df['empirical']), np.max(df['empirical']))) ax2.set_xlabel('x') ax2.set_ylabel('residual') + ax2.set_xlim(ax1.get_xlim()) + ax2.set_ylim(min(ax2.get_ylim()[0], -1), max(ax2.get_ylim()[1], 1)) if options.get('log_y', None): ax1.set_yscale('log') if options.get('log_x', None): ax1.set_xscale('log') + ax2.set_xscale('log') if not options.get('log_x', None) and not options.get('log_y', None): ax1.set_ylim(bottom=0) ax1.set_xlim(left=0) ax1 = _pad_plot_frame(ax1) - - ax2 = _pad_plot_frame(ax2) + ax2 = _pad_plot_frame(ax2) with warnings.catch_warnings(): warnings.simplefilter("ignore") diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index fe9033a..64a83ea 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -41,4 +41,6 @@ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, logser_uptrunc, expon, expon_uptrunc) -from ._curves import (power_law) +from ._curves import (power_law, + mete_sar, mete_iterative_sar, + mete_ear, mete_iterative_ear) diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index a8178c1..8cad480 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -5,6 +5,7 @@ from scipy import optimize from ..misc import inherit_docstring_from +import _distributions as dist _doc_methods = \ """Methods @@ -47,9 +48,8 @@ def vals(self, x, *args, **kwargs): [Docstring] """ - self.vals_kwargs = kwargs x = np.array(x) - y = self._vals(x, *args) + y = self._vals(x, *args, **kwargs) return pd.DataFrame({'x': x, 'y': y}) def _vals(self, x, *args): @@ -143,41 +143,131 @@ def _vals(self, x, c, z): power_law.__doc__ = power_law.__doc__.format(_doc_methods, _doc_parameters) -class gen_sar_gen(curve): +class generic_sar_gen(curve): """ - INCOMPLETE NEEDS CONTINUED WORK - A generic SAR based on a combination of an SAD and SSAD .. math:: - y = c x^z + S = c x^z The generic SAR may be used either for downscaling, when values of A are less than A0, or upscaling, when values of A are greater than A0. Downscaling creates the traditional SAR known to ecologists, while - wpscaling is particularly useful for estimating large-scale species - richness from small-scale plot data. + upscaling is useful for estimating large-scale species richness from small- + scale plot data. A keyword argument iterative is available for the generic SAR (default is False). If True, the SAR is calculated at successive A values, with the - result at each value of A used as the base values of S0 and N0 for the - subsequent calculation. The iterative SAR form is a generalization of the - universal SAR proposed by Harte et al [#]_. + result at each value of A used as the base values of S and N for the + subsequent calculation. The generic iterative SAR form is a generalization + of the universal SAR proposed by Harte et al [#]_. Methods ------- - vals(S0, N0, A, SAD_model, SSAD_model) + vals(x, S0, N0, SAD_model, SSAD_model) Calculate SAR given starting values and two models. See notes. Parameters ---------- + x : iterable + Areas at which to calculate SAR (first element is A0) + S0 : float + Species richness at A0 + SAD_model : object + Frozen distribution from macroeco.models + SSAD_model : object + Frozen distribution from macroeco.models + tol : float + Stop calculation when 1 - tol of pdf of SAD has been evaluated. Since + the SSAD is generally decreasing at high SAD values, this is nearly + always an upper limit on the error in the SAR. + + Notes + ----- + The SAR results here are an underestimate, determined by the value of tol, + as the summation is carried out only the cdf of the SAD reaches 1-tol. + + Notes + ----- + The frozen distributions SAD_model and SSAD_model should generally be + frozen with parameters applicable to the base scale at which S0 is + measured. + + References + ---------- + .. [#] + Harte, J., Smith, A. B., & Storch, D. (2009). Biodiversity scales from + plots to biomes with a universal species-area curve. Ecology Letters, + 12(8), 789-797. + + """ + + def _vals(self, x, S0, SAD_model, SSAD_model): + # x is area, y is S + + A0 = x[0] + y = [S0] + + for A in x[1:]: + a = A/A0 + + if a == 1: + S1 = S0 + elif a < 1: + S1 = self._downscale_step(a, SAD_model, SSAD_model) + else: + S1 = self._upscale_step(a, SAD_model, SSAD_model) + + y.append(S1) + + return np.array(y) + + def _downscale_step(self, a, SAD_model, SSAD_model): + pass + + def _upscale_step(self, a, SAD_model, SSAD_model): + pass + + def fit_lsq(self, patch, cols, SAD_model_name, SSAD_model_name): + raise NotImplementedError, ("fit method not available for generic sar") + +generic_sar = generic_sar_gen(name='generic_sar', parameters='') + + +class mete_sar_gen(curve): + """ + The SAR predicted by the Maximum Entropy Theory of Ecology + + .. math:: + + S = c x^z + + The generic SAR may be used either for downscaling, when values of A are + less than A0, or upscaling, when values of A are greater than A0. + Downscaling creates the traditional SAR known to ecologists, while + upscaling is useful for estimating large-scale species richness from small- + scale plot data. + + A keyword argument iterative is available (default is False). If True, the + SAR is calculated at successive A values, with the result at each value of + A used as the base values of S and N for the subsequent calculation. The + iterative form was used in is the form used in Harte et al [#]_, although + note that the implementation here uses a different internal equation. + + Methods + ------- + vals(x, S0, N0, iterative=False) + Calculate SAR given starting values and two models. See notes. + + Parameters + ---------- + x : iterable + Areas at which to calculate SAR (first element is A0) S0 : float Species richness at A0 N0 : float Community abundance at A0 - A : iterable - Areas at which to calculate SAR (first element is A0) SAD_model : object Frozen distribution from macroeco.models SSAD_model : object @@ -195,39 +285,110 @@ class gen_sar_gen(curve): """ + def __init__(self, name=None, parameters=None, iterative=False, ear=False): + """ + Provides extra iterative attribute. + """ + self.name = name + self.parameters = parameters + self.n_parameters = len(parameters.split(',')) + self.iterative = iterative + self.ear = ear + def _vals(self, x, S0, N0, iterative=False): # x is area, y is S + if iterative: # Override attribute set by init if passed here + self.iterative = iterative + A0 = x[0] y = [S0] for A in x[1:]: - S1, N1 = self._single_step(S0, N0, A/A0) + a = A/A0 + + if a == 1: + S1, N1 = S0, N0 + elif a < 1: + S1, N1 = self._downscale_step(a, S0, N0) + else: + S1, N1 = self._upscale_step(a, S0, N0) + y.append(S1) - if iterative: + + if self.iterative: S0, N0, A0 = S1, N1, A return np.array(y) - def _single_step(self, S0, N0, a): - # if a < 1, solve, if a > 1, guess and check - if a == 1: - S1 = S0 - N1 = N0 - elif a < 1: # "Normal" downscale - S1 = S0 - N1 = N0 - else: # Upscale solver - S1 = S0 - N1 = N0 + def _downscale_step(self, a, S0, N0, array_size=1e6): + + lower = 1 + upper = array_size + 1 + S = 0 + + while lower < N0: + + if S0 < 1 or np.isnan(S0): # Give up and continue if S0 too small + S = np.nan + lower += array_size + upper += array_size + continue + + if upper > N0: + upper = N0 + + n0 = np.arange(lower, upper) + sad_p, _ = dist.logser_uptrunc.translate_args(N0/S0, N0) + sad = dist.logser_uptrunc.pmf(n0, sad_p, N0) + + if np.isclose(a, 0.5): + ssad_p = 1 / (n0 + 1) + else: + ssad_p, _ = dist.geom_uptrunc.translate_args(a*n0, N0) + + if self.ear: + ssad = dist.geom_uptrunc.pmf(n0, ssad_p, N0) + S += S0 * np.sum(ssad * sad) + else: + ssad = dist.geom_uptrunc.pmf(0, ssad_p, N0) + S += S0 * np.sum((1 - ssad) * sad) - return S1, N1 + lower += array_size + upper += array_size -gen_sar = gen_sar_gen(name='gen_sar', parameters='S0,N0') + return S, N0*a + def _upscale_step(self, a, S0, N0): + raise NotImplementedError, "Upscaling not implemented yet" + + def fit_lsq(self, df): + """ + Parameterize generic SAR curve from empirical data set + + Parameters + ---------- + df : DataFrame + Result data frame from empirical SAR analysis + + Notes + ----- + Method does not use least squares to fit, but rather parameterizes SAD + and SSAD mdoels based on SAR output. Name ``fit_lsq`` is retained for + consistency with other curves. + + """ + # Just return S0 and N0 at largest scale, which is first row of df + return df['n_spp'].values[0], df['n_individs'].values[0] +mete_sar = mete_sar_gen(name='mete_sar', parameters='S0,N0') +mete_iterative_sar = mete_sar_gen(name='mete_iterative_sar', + parameters='S0,N0', iterative=True) +mete_ear = mete_sar_gen(name='mete_ear', parameters='S0,N0', ear=True) +mete_iterative_ear = mete_sar_gen(name='mete_iterative_sar', + parameters='S0,N0', iterative=True, ear=True) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 15a2b4b..85b5b65 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -353,7 +353,7 @@ def p_eq(x, mu, b): return ( (x / (1 - x)) - ((b + 1) / (x**-b - 1)) - mu ) # x here is the param raised to the k_agg power, or 1 - p - return 1 - optim.brentq(p_eq, 1e-9, 20, args=(mu, b), disp=True) + return 1 - optim.brentq(p_eq, 1e-16, 100, args=(mu, b), disp=True) _geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) @@ -556,7 +556,7 @@ class logser_uptrunc_gen(rv_discrete_meco): r""" Upper truncated logseries random variable - This distribuiton was described by Harte (2011) [#]_ + This distribution was described by Harte (2011) [#]_ .. math:: @@ -688,7 +688,7 @@ def _trunc_logser_solver(bins, b): y = lambda x: np.sum(x ** m / b * bins) - np.sum((x ** m) / m) p = optim.bisect(y, BOUNDS[0] + DIST_FROM_BOUND, min((sys.float_info[0] / bins) ** (1 / b), 2), - xtol=1.490116e-08) + xtol=1.490116e-08, maxiter=1000) return p # From dd1182f51ef68ba5b766c4e1f6e6675871d40012 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 11:57:05 -0700 Subject: [PATCH 222/343] Clean up plot sorting in main --- macroeco/main/main.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 282acec..60f980c 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -514,24 +514,24 @@ def _write_comparison_plot_table(spid, models, options, core_results, # TODO: Clean up sorting, may not work if SAR x out of order, e.g. is_curve = 'x' in core_results[0][1] - core_result = core_results[spid][1] - n_vals = len(core_result) - - # Set x (given or rank) and y in df - if is_curve: - df = core_result.sort(columns='x') - else: - x = np.arange(n_vals) + 1 - df = core_result.sort(columns='y', ascending=False) - df.insert(0, 'x', x) + df = core_results[spid][1] df.rename(columns={'y': 'empirical'}, inplace=True) + # If distribution, need to sort values so will match sorted rank in fits + if not is_curve: + x = np.arange(len(df)) + 1 + df = df.sort(columns='empirical') + df.insert(0, 'x', x[::-1]) + # Add residual column for each model for model in models: fit_result = fit_results[spid][model] - result = fit_result[1]['y'].values[::-1] - df[model] = result - df[model + "_residual"] = result - df['empirical'] + df[model] = fit_result[1] + df[model + "_residual"] = df[model] - df['empirical'] + + # If curve, sort now for plotting purposes + if is_curve: + df = df.sort(columns='x') # Set up file paths f_path = _get_file_path(spid, options, 'data_models.csv') From 4cb0e696fe71b5e2070afbc35c8860ab72776578 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 11:57:27 -0700 Subject: [PATCH 223/343] Make curves return simple array from vals, not dataframe --- macroeco/main/main.py | 2 +- macroeco/models/_curves.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 60f980c..87a906b 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -375,7 +375,7 @@ def _get_comparison_stat(core_result, values, model, fits): try: # Only curves have vals eval("mod.%s" % model + ".vals.__doc__") obs = core_result[1]['y'].values - pred = values['y'].values + pred = values name = ['R2'] stat = comp.r_squared(obs, pred, one_to_one=True) except AttributeError: diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index 8cad480..b416265 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -49,8 +49,7 @@ def vals(self, x, *args, **kwargs): """ x = np.array(x) - y = self._vals(x, *args, **kwargs) - return pd.DataFrame({'x': x, 'y': y}) + return self._vals(x, *args, **kwargs) def _vals(self, x, *args): """ @@ -97,7 +96,7 @@ def fit_lsq(self, x, y_obs, params_start=None): # Calculate fit def residuals(params, x, y_obs): - y_pred = self.vals(x, *params)['y'] + y_pred = self.vals(x, *params) return y_obs - y_pred params_fit, _, _, msg, ier = optimize.leastsq(residuals, params_start, From 77de9831be2849d9dab58145b39f27c0687e60c1 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 11:57:44 -0700 Subject: [PATCH 224/343] Use simulated log scale for residual plots if log_y requested --- macroeco/main/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 87a906b..b76c1b9 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -559,6 +559,7 @@ def _write_comparison_plot_table(spid, models, options, core_results, if options.get('log_y', None): ax1.set_yscale('log') + ax2.set_yscale('symlog', linthreshy=1) if options.get('log_x', None): ax1.set_xscale('log') ax2.set_xscale('log') From 959fd9e085848d3f929d0cc4ae795636a32f5283 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 14:11:26 -0700 Subject: [PATCH 225/343] Code review compare --- macroeco/compare/__init__.py | 4 +- macroeco/compare/compare.py | 154 +++++++++++++++---------------- macroeco/compare/test_compare.py | 95 +++++-------------- 3 files changed, 100 insertions(+), 153 deletions(-) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index e3dc66a..5afed57 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -23,6 +23,6 @@ """ -from .compare import (nll, lrt, AIC, AIC_weights, +from .compare import (nll, lrt, AIC, AIC_compare, sum_of_squares, r_squared, - bin_data) + preston_bin) diff --git a/macroeco/compare/compare.py b/macroeco/compare/compare.py index a182b0c..d2a9921 100644 --- a/macroeco/compare/compare.py +++ b/macroeco/compare/compare.py @@ -8,14 +8,13 @@ from ..misc import doc_sub _data_doc = \ - """data : array-like - data from which to caculate the the likelihood - """ + """data : iterable + Data for analysis""" _model_doc = \ - """model : frozen distribution object A frozen scipy model object. When - freezing, keyword args ``loc`` and ``scale`` should only be included if - they represent a distribution parameter. + """model : obj + Scipy frozen distribution object. When freezing, keyword args ``loc`` + and ``scale`` should only be included if they represent a parameter. """ _obs_pred_doc = \ @@ -27,7 +26,7 @@ @doc_sub(_data_doc, _model_doc) def nll(data, model): """ - Calculate the neagtive log likelihood given data and a model + Negative log likelihood given data and a model Parameters ---------- @@ -51,25 +50,19 @@ def nll(data, model): @doc_sub(_data_doc) def lrt(data, model_null, model_alt, df=None): """ - This functions compares two nested models using the likelihood ratio - test. + Compare two nested models using a likelihood ratio test Parameters ---------- {0} - model_null : scipy distribution object - The null model as a frozen scipy distribution object. Parameters of - distribution must be given as keyword arguments. - Ex. ``norm = stats.norm(loc=0, scale=1)`` - + model_null : obj + A frozen scipy distribution object representing the null model. model_alt : scipy distribution object - The alternative model as a a frozen scipy distribution object. - + A frozen scipy distribution object representing the alternative model. df : int - Optional. Specify the degrees of freedom for the lrt. Calculated - as the number of parameters in model_alt - number of parameters in - model_null. If None, the df is calculated from the model - objects. + The degrees of freedom for the lrt (optional). If none, df is + calculated as the difference between the number of parameters in the + null and alternative models. Returns ------- @@ -78,14 +71,15 @@ def lrt(data, model_null, model_alt, df=None): Notes ----- + Parameters of distribution objects must be given as keyword arguments. Ex. + ``norm = stats.norm(loc=0, scale=1)`` - Interpretation: p-value < alpha suggests signficant evidence for your - alternative model + A p-value < alpha suggests signficant evidence for the alternative model. - The LRT only applies to nested models. The variable test_stat is known as - the G^2 statistic. The G-test uses the fact that -2log(Likelihood_null / - Likelihood_alt) is approximately chi-squared. This assumption breaks down - for small samples sizes. + The LRT only applies to nested models. The G^2 statistic and G-test rely on + the the assumption that -2log(Likelihood_null / Likelihood_alt) is + approximately chi-squared distributed. This assumption breaks down for + small samples sizes. """ @@ -105,19 +99,17 @@ def lrt(data, model_null, model_alt, df=None): @doc_sub(_data_doc, _model_doc) def AIC(data, model, params=None, corrected=True): """ - Calculate AIC given values of a model given data and model parameters + Akaike Information Criteria given data and a model Parameters ---------- {0} {1} params : int - The number of parameters in the model. If None, calculates the number - of parameters from the distribution object - + Number of parameters in the model. If None, calculated from model + object. corrected : bool - If True, calculates the corrected AICC, if False calculates the - uncorrected AIC. + If True, calculates the small-sample size correct AICC. Default False. Returns ------- @@ -131,9 +123,9 @@ def AIC(data, model, params=None, corrected=True): References ---------- .. [#] - Burnham, K and Anderson, D. (2002) Model Selection and Multimodel - Inference: A Practical and Information-Theoretic Approach (p. 66). New - York City, USA: Springer. + Burnham, K and Anderson, D. (2002) Model Selection and Multimodel + Inference: A Practical and Information-Theoretic Approach (p. 66). New + York City, USA: Springer. """ n = len(data) # Number of observations @@ -152,25 +144,26 @@ def AIC(data, model, params=None, corrected=True): return aic_value -def AIC_weights(aic_list): +def AIC_compare(aic_list): """ - Calculates the AIC weights for a given set of models. + Calculates delta AIC and AIC weights from a list of AIC values Parameters ----------------- - aic_list : array-like object - Array-like object containing AIC values from different models + aic_list : iterable + AIC values from set of candidat models Returns ------------- tuple - First element contains the relative AIC weights, second element - contains the delta AIC values. + First element contains the delta AIC values, second element contains + the relative AIC weights. Notes ----- AIC weights can be interpreted as the probability that a given model is the - best model in comparison to the other models + best model in the set. + """ aic_values = np.array(aic_list) @@ -179,42 +172,50 @@ def AIC_weights(aic_list): values = np.exp(-delta / 2) weights = values / np.sum(values) - return weights, delta + return delta, weights -@doc_sub(_obs_pred_doc) def sum_of_squares(obs, pred): """ - Calculates the sum of squares between observed (X) and predicted (Y) data. - Attempts to braodcast arrays if lengths don't match. + Sum of squares between observed and predicted data Parameters ---------- - {0} + obs : iterable + Observed data + pred : iterable + Predicted data + Returns ------- float Sum of squares + + Notes + ----- + The length of observed and predicted data must match. + """ - #obs, pred = tuple(np.broadcast_arrays(obs, pred)) + return np.sum((np.array(obs) - np.array(pred)) ** 2) -@doc_sub(_obs_pred_doc) def r_squared(obs, pred, one_to_one=False, log_trans=False): """ - Get's the R^2 value for a regression of observed (X) and predicted (Y) - data + R^2 value for a regression of observed and predicted data Parameters ---------- - {0} + obs : iterable + Observed data + pred : iterable + Predicted data one_to_one : bool - If True, calculates the R^2 based on the one-to-one line as done in - [#]_. If False, calculates the standard R^2 from a regression fit. - + If True, calculates the R^2 based on the one-to-one line (see [#]_), + and if False, calculates the standard R^2 based on a linear regression. + Default False. log_trans : bool - If True, log transforms obs and pred. + If True, log transforms obs and pred before R^2 calculation. Returns ------- @@ -223,64 +224,61 @@ def r_squared(obs, pred, one_to_one=False, log_trans=False): Notes ----- - Using just R^2 to compare the fit of observed and predicted values can be - misleading because the relationship may not be one-to-one but the R^2 - value may be quite high. The one-to-one option alleviates this problem. + Using the traditional R^2 to compare the fit of observed and predicted + values may be misleading as the relationship may not be one-to-one but the + R^2 value may be quite high. The one-to-one option alleviates this problem. References ---------- .. [#] - White, E., Thibault, K., & Xiao, X. (2012). Characterizing the species - abundance distributions across taxa and ecosystems using a simple - maximum entropy model. Ecology, 93(8), 1772-8 + White, E., Thibault, K., & Xiao, X. (2012). Characterizing the species + abundance distributions across taxa and ecosystems using a simple + maximum entropy model. Ecology, 93(8), 1772-8 """ - # Sort obs and pred - obs = np.sort(obs) - pred = np.sort(pred) - if log_trans: obs = np.log(obs) pred = np.log(pred) if one_to_one: - # Equation from White et al 2012 - r_sq = 1 - sum_of_squares(obs, pred) / \ - sum_of_squares(obs, np.mean(obs)) + r_sq = 1 - (sum_of_squares(obs, pred) / + sum_of_squares(obs, np.mean(obs))) else: b0, b1, r, p_value, se = stats.linregress(obs, pred) r_sq = r ** 2 return r_sq - -def bin_data(data, max_num): +def preston_bin(data, max_num): """ - Bins the data on base 2. Uses Preston's method of binning which has - exclusive lower boundaries and inclusive upper boundaries. Densities are - not split between bins. + Bins data on base 2 using Preston's method Parameters ---------- data : array-like Data to be binned - max_num : float - The maximum upper most boundary of the data + The maximum upper value of the data Returns ------- tuple (binned_data, bin_edges) + Notes + ----- + Uses Preston's method of binning, which has exclusive lower boundaries and + inclusive upper boundaries. Densities are not split between bins. + References ---------- .. [#] - Preston, F. (1962). The canonical distribution of commonness and rarity. - Ecology, 43, 185-215 + Preston, F. (1962). The canonical distribution of commonness and rarity. + Ecology, 43, 185-215 """ + log_ub = np.ceil(np.log2(max_num)) # Make an exclusive lower bound in keeping with Preston diff --git a/macroeco/compare/test_compare.py b/macroeco/compare/test_compare.py index 1531cf5..82c1a9a 100644 --- a/macroeco/compare/test_compare.py +++ b/macroeco/compare/test_compare.py @@ -16,11 +16,9 @@ class TestNLL(TestCase): - '''Test NLL in compare''' def test_nll(self): - - # Test against R result: sum(dnorm(c(1,2,3,4,5), log=TRUE)) + # R: sum(dnorm(c(1,2,3,4,5), log=TRUE)) R_res = 32.09469 data = np.array([1, 2, 3, 4, 5]) model = stats.norm(loc=0, scale=1) @@ -28,65 +26,54 @@ def test_nll(self): assert_almost_equal(R_res, lglk, decimal=5) +# TODO: Test LRT + + class TestAIC(TestCase): - """Test AIC function""" def test_aic_basic(self): - """Testing basic functionality of AIC""" - # Test case 1 model = stats.norm(loc=0, scale=1) data = np.arange(1, 9) aic1 = AIC(data, model, corrected=False) expected = 222.703016531 # Calculated by hand - assert_almost_equal(aic1, expected) + assert_almost_equal(aic1, expected, decimal=6) - # Test case 2 model = stats.gamma(a=2) data = [1, 1, 1, 2, 4, 5, 7, 12] aic1 = AIC(data, model, corrected=False) - expected = 51.760607494 + expected = 51.760607494 # Calculated by hand assert_almost_equal(aic1, expected, decimal=6) - # Test case 3 model = stats.gamma(a=2, loc=0) aic1 = AIC(data, model, corrected=False) - expected = 53.760607494 + expected = 53.760607494 # Calculated by hand assert_almost_equal(aic1, expected, decimal=6) def test_aic_given_params(self): - """ Test AIC if params are given """ - # Test case 1 model = stats.norm() data = np.arange(1, 9) aic1 = AIC(data, model, corrected=False, params=2) - # statsmodel.tools.eval_measures.aic: aic(L, 8, 2) expected = 222.703016531 assert_almost_equal(aic1, expected) - # Test case 2 model = stats.gamma(2) data = [1, 1, 1, 2, 4, 5, 7, 12] aic1 = AIC(data, model, corrected=False, params=1) - # statsmodel.tools.eval_measures.aic: aic(L, 8, 1) expected = 51.760607494 assert_almost_equal(aic1, expected, decimal=6) - # Test case 3 model = stats.gamma(2, 0) aic1 = AIC(data, model, corrected=False, params=2) - # statsmodel.tools.eval_measures.aic: aic(L, 8, 2) expected = 53.760607494 assert_almost_equal(aic1, expected, decimal=6) def test_aicc(self): - """ Test AICC gives expected results""" - # Test values model = stats.norm() data = np.arange(1, 9) aic1 = AIC(data, model, corrected=True, params=2) @@ -94,101 +81,63 @@ def test_aicc(self): assert_almost_equal(expected, aic1, decimal=5) -class TestAICWeights(TestCase): +class TestAICCompare(TestCase): - def test_aic_weights(self): + def test_aic_delta_and_weights(self): - # Test values data = [1, 1, 1, 2, 3, 4, 7, 23, 78] models = [stats.norm(scale=100), stats.norm(scale=99)] aic_vals = [AIC(data, tm) for tm in models] + daic, aicw = AIC_compare(aic_vals) - aicw, delta_aic = AIC_weights(aic_vals) - - # Calculated by hand - pred = np.array([0.47909787, 0.52090213]) + pred = np.array([0.47909787, 0.52090213]) # Calculated by hand assert_array_almost_equal(aicw, pred) + assert_array_almost_equal(daic, [daic[0]-daic[1], 0]) class TestRsquared(TestCase): - def test_basic_r_squared(self): - - # Already unittested in scipy. Checking for functionaliity - test_data = np.random.randint(5, 100, 100) - rsq = r_squared(test_data, test_data) - assert_equal(rsq, 1) - - def test_one_to_one_rsq(self): + def test_r_squared_repeated_data(self): # Identical data should lead to an R^2 of 1 test_data = np.random.randint(5, 100, 100) - rsq = r_squared(test_data, test_data, one_to_one=True) + rsq = r_squared(test_data, test_data) assert_equal(rsq, 1) - # Test against R^2 from fixed slope linear regression in R - # Calculate by hand? + # TODO: Test known R2 for regression and one-to-one -class TestBinData(TestCase): +class TestPrestonBin(TestCase): - def test_bin_data_functionality(self): + def test_bin_functionality(self): # Test against R's vegan prestonfit: prestonfit(data, tiesplit=FALSE) # Note that vegan drops the bins with 0 values data = np.array([1, 1, 1, 1, 2, 2, 4, 4, 8, 16, 17.1, 89]) vegan = np.array([4, 2, 2, 1, 1, 1, 0, 1], dtype=np.float) - test_res = bin_data(data, max(data))[0] + test_res = preston_bin(data, max(data))[0] assert_array_equal(test_res, vegan) data = np.array([1, 1, 1, 1, 4, 5, 6, 7, 12, 34, 56]) vegan = np.array([4, 0, 1, 3, 1, 0, 2], dtype=np.float) - test_res = bin_data(data, max(data))[0] + test_res = preston_bin(data, max(data))[0] assert_array_equal(test_res, vegan) def test_bin_data_boundary(self): # Test boundary condition data = np.array([1, 2]) vegan = np.array([1, 1], dtype=np.float) - test_res = bin_data(data, max(data))[0] + test_res = preston_bin(data, max(data))[0] assert_array_equal(test_res, vegan) data = np.array([1, 1, 1]) vegan = np.array([3], dtype=np.float) - test_res = bin_data(data, max(data))[0] + test_res = preston_bin(data, max(data))[0] assert_array_equal(test_res, vegan) data = np.array([1, 2, 3]) vegan = np.array([1, 1, 1], dtype=np.float) - test_res = bin_data(data, max(data))[0] + test_res = preston_bin(data, max(data))[0] assert_array_equal(test_res, vegan) - # def test_lrt(self): - - # # Test against what the lrtest() R function returns - # model1 = 158.0494 - # model0 = 139.806 - # R_chisquare = 36.4868 - # R_p = 1.537e-09 - - # pred_chi, pred_p = lrt(model1, model0, 1) - - # assert_almost_equal(pred_chi, R_chisquare) - # assert_almost_equal(pred_p, R_p) - - # def test_empirical_cdf(self): - - # #Test against R's ecdf function - - # # Test Case 1 - # test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] - # R_res = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] - # res = empirical_cdf(test_data) - # assert_array_equal(R_res, res) - - # # Test Case 2 - # test_data = [3, 3, 3, 3] - # R_res = [1, 1, 1, 1] - # res = empirical_cdf(test_data) - # assert_array_equal(R_res, res) From 993e7ee17fd43be5c7788dfdafa56db5270c3979 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 14:14:08 -0700 Subject: [PATCH 226/343] Correct relative import syntax --- macroeco/models/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 64a83ea..7860e54 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -33,13 +33,11 @@ cnbinom logser_uptrunc -.. DV: - Our public-facing distributions do not use location and scale parameters, as - they are not common in quantitative ecology. """ -from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, - logser_uptrunc, expon, expon_uptrunc) +from ._distributions import (geom, geom_uptrunc, nbinom, cnbinom, + logser_uptrunc, + expon, expon_uptrunc) from ._curves import (power_law, mete_sar, mete_iterative_sar, From 6bcffc0b9e8ff86afdf4f6dff0e8f1adf54b427e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 14:14:59 -0700 Subject: [PATCH 227/343] Rename compare to _compare to hide file --- macroeco/compare/__init__.py | 6 +++--- macroeco/compare/{compare.py => _compare.py} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename macroeco/compare/{compare.py => _compare.py} (100%) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index 5afed57..0375f1e 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -23,6 +23,6 @@ """ -from .compare import (nll, lrt, AIC, AIC_compare, - sum_of_squares, r_squared, - preston_bin) +from ._compare import (nll, lrt, AIC, AIC_compare, + sum_of_squares, r_squared, + preston_bin) diff --git a/macroeco/compare/compare.py b/macroeco/compare/_compare.py similarity index 100% rename from macroeco/compare/compare.py rename to macroeco/compare/_compare.py From 99a0525bb80311dc258f48f5133f61b472507937 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 14:17:45 -0700 Subject: [PATCH 228/343] Rename empirical to _empirical --- macroeco/empirical/__init__.py | 15 +++++++++++---- .../empirical/{empirical.py => _empirical.py} | 0 2 files changed, 11 insertions(+), 4 deletions(-) rename macroeco/empirical/{empirical.py => _empirical.py} (100%) diff --git a/macroeco/empirical/__init__.py b/macroeco/empirical/__init__.py index 48c7070..c899a92 100644 --- a/macroeco/empirical/__init__.py +++ b/macroeco/empirical/__init__.py @@ -25,11 +25,18 @@ sad ssad sar - ear comm_grid +Other +===== + +.. autosummary:: + :toctree: generated/ + + empirical_cdf + """ -from .empirical import (Patch, - sad, ssad, sar, comm_grid, - empirical_cdf) +from ._empirical import (Patch, + sad, ssad, sar, comm_grid, + empirical_cdf) diff --git a/macroeco/empirical/empirical.py b/macroeco/empirical/_empirical.py similarity index 100% rename from macroeco/empirical/empirical.py rename to macroeco/empirical/_empirical.py From f1292285ac20d4a07046697c3b81bf1cf33b396d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 20:28:41 -0700 Subject: [PATCH 229/343] Clean up tops of test modules --- macroeco/compare/test_compare.py | 6 ------ macroeco/models/test_distributions.py | 5 ----- 2 files changed, 11 deletions(-) diff --git a/macroeco/compare/test_compare.py b/macroeco/compare/test_compare.py index 82c1a9a..97b32cc 100644 --- a/macroeco/compare/test_compare.py +++ b/macroeco/compare/test_compare.py @@ -1,8 +1,3 @@ -#!/usr/bin/python -""" -Tests for compare module - -""" from __future__ import division from numpy.testing import (TestCase, assert_equal, assert_array_equal, @@ -12,7 +7,6 @@ from macroeco.compare import * import numpy as np import scipy.stats as stats -import numpy.testing as nt class TestNLL(TestCase): diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index af98128..bad5eef 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -1,8 +1,3 @@ -""" -Tests for distributions2 module - -""" - from __future__ import division from numpy.testing import (TestCase, assert_equal, assert_array_equal, From 57b7dc2966c0f45ae8935d9b96791afd76e2c60a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 20:28:53 -0700 Subject: [PATCH 230/343] Whitespace cleanup --- macroeco/models/test_distributions.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index bad5eef..d6556f4 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -11,11 +11,12 @@ import matplotlib.pyplot as plt import scipy.stats as stats + class TestGeom(TestCase): def test_pmf(self): - vals = geom.pmf([0,1,2], 0.5) - assert_array_almost_equal(vals, [0.5,0.25,0.125]) + vals = geom_uptrunc.pmf([0,1,2], 0.25, 2) + assert_array_almost_equal(vals, np.array([0.25, 0.1875, 0.140625])) def test_mean(self): mu1 = geom.mean(0.5) @@ -43,17 +44,17 @@ def test_pmf(self): # Expected values are regular geo cdf divided by cdf at b vals = geom_uptrunc.pmf([0,1,2], 0.25, 2) assert_array_almost_equal(vals, - np.array([0.25,0.1875,0.140625])/0.578125) + np.array([0.25,0.1875,0.140625]) / 0.578125) def test_cdf(self): # Expected values are regular geom cdf divided by cdf at b vals = geom_uptrunc.cdf([0,1,2], 0.5, 2) - assert_array_almost_equal(vals, np.array([0.5,0.75,0.875])/0.875) + assert_array_almost_equal(vals, np.array([0.5,0.75,0.875]) / 0.875) def test_cdf_x_len_1(self): # cdf should be not throw error even if x is len 1 vals = geom_uptrunc.cdf(0, 0.5, 2) - assert_almost_equal(vals, 0.5/0.875) + assert_almost_equal(vals, 0.5 / 0.875) def test_mean(self): mu1 = geom_uptrunc.mean(0.801, 32) From e3f8fe5e3f1aebebbbf4ac0096864f3f9a62696d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 11 Apr 2014 20:31:45 -0700 Subject: [PATCH 231/343] Refactor _do_format to shorten --- macroeco/main/main.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index b76c1b9..81a5f08 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -98,38 +98,13 @@ def _get_params_base_options(param_path): def _do_format(options): - """ - Notes - ----- - All format functions take the same parameters: original csv path, output - csv path, and keyword arguments. - - """ - analysis_name = options['analysis'] datapath = os.path.normpath(os.path.join(options['param_dir'], options['data'])) - out_path = os.path.splitext(datapath)[0] + "_formatted.csv" - if analysis_name == 'format_dense': - - misc.data_read_write(datapath, out_path, "dense", **options) - - elif analysis_name == 'format_stacked': - - misc.data_read_write(datapath, out_path, "stacked", **options) - - elif analysis_name == 'format_grid': - - misc.data_read_write(datapath, out_path, "grid", **options) - - elif analysis_name == 'format_transect': - - misc.data_read_write(datapath, out_path, "transect", **options) - - else: - raise NameError("Cannot format data using analysis %s" % analysis_name) + format_type = options['analysis'].split('_')[1] + misc.data_read_write(datapath, out_path, format_type, **options) def _do_analysis(options): From 7063739b61da55746eb965e64de2f17f1d0f353d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:51:48 -0700 Subject: [PATCH 232/343] Clean up whitespace, order, formatting, documentation --- macroeco/empirical/_empirical.py | 278 ++++++++++++++++++------------- 1 file changed, 159 insertions(+), 119 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 4b43f9a..6d29d8d 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -76,6 +76,7 @@ and across eight subplots when the patch is split into 2 parts along x_col and 4 parts along y_col.""" + class Patch(object): """ An object representing an empirical census @@ -103,11 +104,6 @@ class Patch(object): consisting only of letters and numbers, with no spaces or other special characters. - The meta attribute of this object is processed to reflect the value of - subset. If columns with a min and a max are included in the subset string, - the min and max values for that column in meta will be updated to reflect - the specified limits. - The parameter subset takes different forms depending on whether the data file described by the metadata is a csv or a sql/db file. @@ -121,6 +117,13 @@ class Patch(object): For sql/db files, subset is a SQL query string that selects the data from the data file. + The meta attribute of this object is processed to reflect the value of + subset. If columns with a min and a max are included in the subset string, + the min and max values for that column in meta will be updated to reflect + the specified limits. + + An empty Patch object can be created with a metadata_path of None. + """ def __init__(self, metadata_path, subset=''): @@ -189,6 +192,7 @@ def _get_db_table(self, data_path, extension): """ # TODO: This is probably broken + raise NotImplementedError, "SQL and db file formats not yet supported" # Load table if extension == 'sql': @@ -234,7 +238,7 @@ def _get_db_table(self, data_path, extension): def _subset_table(full_table, subset): """ - Return subtable matching all conditions in subset. + Return subtable matching all conditions in subset Parameters ---------- @@ -252,19 +256,19 @@ def _subset_table(full_table, subset): if not subset: return full_table - # TODO: Figure out in syntax for logical or + # TODO: Figure out syntax for logical or conditions = subset.replace(' ','').split(';') valid = np.ones(len(full_table), dtype=bool) for condition in conditions: - this_valid = eval('full_table.'+condition) + this_valid = eval('full_table.' + condition) valid = np.logical_and(valid, this_valid) return full_table[valid] def _subset_meta(full_meta, subset): """ - Return subset of metadata matching all conditions in subset. + Return metadata reflecting all conditions in subset Parameters ---------- @@ -275,7 +279,7 @@ def _subset_meta(full_meta, subset): Returns ------- - ConfigParser obj + Configparser object or dict Updated version of full_meta accounting for subset string """ @@ -323,8 +327,7 @@ def sad(patch, cols, splits, clean=True): ---------- {0} clean : bool - If True, all species with zero abundance are removed from SAD results - (relevant if splits is used and some splits are missing species). + If True, all species with zero abundance are removed from SAD results. Default False. Returns @@ -643,109 +646,6 @@ def _yield_spatial_table(patch, div, spp_col, count_col, x_col, y_col): -def comm_sep(self, plot_locs, criteria, loc_unit=None): - ''' - Calculates commonality (Sorensen and Jaccard) between pairs of plots. - - Parameters - ---------- - plot_locs : dict - Dictionary with keys equal to each plot name, which must be - represented by a column in the data table, and values equal to a - tuple of the x and y coordinate of each plot - criteria : dict - See docstring for Patch.sad. - loc_unit : str - Unit of plot locations. Special cases include 'decdeg' (decimal - degrees), returns result in km. Otherwise ignored. - - Returns - ------- - result: structured array - Returns a structured array with fields plot-a and plot-b (names of - two plots), dist (distance between plots), and sorensen and jaccard - (similarity indices). Has row for each unique pair of plots. - ''' - - # Set up sad_dict with key=plot and val=clean sad for that plot - sad_dict = {} - - # Loop through all plot cols, updating criteria, and getting spp_list - for plot in plot_locs.keys(): - - # Find current count col and remove it from criteria - for crit_key in criteria.keys(): - if criteria[crit_key] == 'count': - criteria.pop(crit_key, None) - - # Add this plot as col with counts - criteria[plot] = 'count' - - # Get SAD for existing criteria with this plot as count col - sad_return = self.sad(criteria, clean=True) - - # Check that sad_return only has one element, or throw error - if len(sad_return) > 1: - raise NotImplementedError('Too many criteria for comm_sep') - - # Get unique species list for this plot and store in sad_dict - sad_dict[plot] = sad_return[0][2] - - # Set up recarray to hold Sorensen index for all pairs of plots - n_pairs = np.sum(np.arange(len(plot_locs.keys()))) - result = np.recarray((n_pairs,), dtype=[('plot-a','S32'), - ('plot-b', 'S32'), - ('spp-a', int), - ('spp-b', int), - ('dist', float), - ('sorensen', float), - ('jaccard', float)]) - - # Loop through all combinations of plots and fill in result table - row = 0 - for pair in itertools.combinations(plot_locs.keys(), 2): - - # Names of plots - plota = pair[0] - plotb = pair[1] - - result[row]['plot-a'] = plota - result[row]['plot-b'] = plotb - - # Calculate inter-plot distance - if loc_unit == 'decdeg': - result[row]['dist'] = decdeg_distance(plot_locs[plota], - plot_locs[plotb]) - else: - result[row]['dist'] = distance(plot_locs[plota], - plot_locs[plotb]) - - # Get similarity indices - spp_a = len(sad_dict[plota]) - spp_b = len(sad_dict[plotb]) - - result[row]['spp-a'] = spp_a - result[row]['spp-b'] = spp_b - - intersect = set(sad_dict[plota]).intersection(sad_dict[plotb]) - union = set(sad_dict[plota]).union(sad_dict[plotb]) - - # Fill in zero if denom is zero - if spp_a + spp_b == 0: - result[row]['sorensen'] = 0 - else: - result[row]['sorensen'] = (2*len(intersect)) / (spp_a+spp_b) - - if len(union) == 0: - result[row]['jaccard'] = 0 - else: - result[row]['jaccard'] = len(intersect) / len(union) - - # Increment row counter - row += 1 - - return result - def o_ring(self, div_cols, bin_edges, criteria, n0_min_max=None, edge_correct=False, density=False): ''' @@ -944,6 +844,115 @@ def o_ring(self, div_cols, bin_edges, criteria, n0_min_max=None, return result_list + + +def comm_sep(self, plot_locs, criteria, loc_unit=None): + ''' + Calculates commonality (Sorensen and Jaccard) between pairs of plots. + + Parameters + ---------- + plot_locs : dict + Dictionary with keys equal to each plot name, which must be + represented by a column in the data table, and values equal to a + tuple of the x and y coordinate of each plot + criteria : dict + See docstring for Patch.sad. + loc_unit : str + Unit of plot locations. Special cases include 'decdeg' (decimal + degrees), returns result in km. Otherwise ignored. + + Returns + ------- + result: structured array + Returns a structured array with fields plot-a and plot-b (names of + two plots), dist (distance between plots), and sorensen and jaccard + (similarity indices). Has row for each unique pair of plots. + ''' + + # Set up sad_dict with key=plot and val=clean sad for that plot + sad_dict = {} + + # Loop through all plot cols, updating criteria, and getting spp_list + for plot in plot_locs.keys(): + + # Find current count col and remove it from criteria + for crit_key in criteria.keys(): + if criteria[crit_key] == 'count': + criteria.pop(crit_key, None) + + # Add this plot as col with counts + criteria[plot] = 'count' + + # Get SAD for existing criteria with this plot as count col + sad_return = self.sad(criteria, clean=True) + + # Check that sad_return only has one element, or throw error + if len(sad_return) > 1: + raise NotImplementedError('Too many criteria for comm_sep') + + # Get unique species list for this plot and store in sad_dict + sad_dict[plot] = sad_return[0][2] + + # Set up recarray to hold Sorensen index for all pairs of plots + n_pairs = np.sum(np.arange(len(plot_locs.keys()))) + result = np.recarray((n_pairs,), dtype=[('plot-a','S32'), + ('plot-b', 'S32'), + ('spp-a', int), + ('spp-b', int), + ('dist', float), + ('sorensen', float), + ('jaccard', float)]) + + # Loop through all combinations of plots and fill in result table + row = 0 + for pair in itertools.combinations(plot_locs.keys(), 2): + + # Names of plots + plota = pair[0] + plotb = pair[1] + + result[row]['plot-a'] = plota + result[row]['plot-b'] = plotb + + # Calculate inter-plot distance + if loc_unit == 'decdeg': + result[row]['dist'] = _decdeg_distance(plot_locs[plota], + plot_locs[plotb]) + else: + result[row]['dist'] = _distance(plot_locs[plota], + plot_locs[plotb]) + + # Get similarity indices + spp_a = len(sad_dict[plota]) + spp_b = len(sad_dict[plotb]) + + result[row]['spp-a'] = spp_a + result[row]['spp-b'] = spp_b + + intersect = set(sad_dict[plota]).intersection(sad_dict[plotb]) + union = set(sad_dict[plota]).union(sad_dict[plotb]) + + # Fill in zero if denom is zero + if spp_a + spp_b == 0: + result[row]['sorensen'] = 0 + else: + result[row]['sorensen'] = (2*len(intersect)) / (spp_a+spp_b) + + if len(union) == 0: + result[row]['jaccard'] = 0 + else: + result[row]['jaccard'] = len(intersect) / len(union) + + # Increment row counter + row += 1 + + return result + + + + + def ied(self, criteria, normalize=True, exponent=0.75): ''' Calculates the individual energy distribution for the entire community @@ -1335,19 +1344,50 @@ def _product(*args, **kwds): return result +def _distance(pt1, pt2): + """Euclidean distance between two points""" + return np.sqrt((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) + + +def _decdeg_distance(pt1, pt2): + """ + Earth surface distance (in km) between decimal latlong points using + Haversine approximation. + + http://stackoverflow.com/questions/15736995/ + how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude- + points + """ + + lat1, lon1 = pt1 + lat2, lon2 = pt2 + + # Convert decimal degrees to radians + lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2]) + + # haversine formula + dlon = lon2 - lon1 + dlat = lat2 - lat1 + a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2 + c = 2 * np.arcsin(np.sqrt(a)) + km = 6367 * c + + return km + + def empirical_cdf(data): """ - Generates an empirical cdf from data. + Generates an empirical cdf from data Parameters ---------- - data : array-like object + data : iterable Empirical data Returns -------- - : DataFrame - Columns 'data' and 'ecdf'. 'data' contains ordered data and 'ecdf' + DataFrame + Columns 'data' and 'ecdf'. 'data' contains ordered data and 'ecdf' contains the corresponding ecdf values for the data. """ From 8a126bfc25959db62f63f2f67e56fd66536a6617 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:52:07 -0700 Subject: [PATCH 233/343] Ensure that index col is not inferred when reading data table --- macroeco/empirical/_empirical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 6d29d8d..2880cde 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -164,7 +164,7 @@ def _load_table(self, metadata_path, data_path): extension = data_path.split('.')[-1] if extension == 'csv': - full_table = pd.read_csv(data_path) + full_table = pd.read_csv(data_path, index_col=False) table = _subset_table(full_table, self.subset) self.meta = _subset_meta(self.meta, self.subset) elif extension in ['db', 'sql']: From 2b6d69f89146d47f8ff294a52a3ff783f6ede00c Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:52:28 -0700 Subject: [PATCH 234/343] Do not subset metadata for categorical columns --- macroeco/empirical/_empirical.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 2880cde..bbee293 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -292,12 +292,16 @@ def _subset_meta(full_meta, subset): conditions = subset.replace(' ','').split(';') - # TODO: This works for numeric, modify to do nothing for categorical cols for condition in conditions: condition_list = re.split('[<>=]', condition) col = condition_list[0] val = condition_list[-1] - col_step = meta[col]['step'] + + try: + col_step = meta[col]['step'] + except: # If there's no metadata for this col, do nothing + continue + operator = re.sub('[^<>=]', '', condition) if operator == '==': From 2dc4261f2cd76712f5c2c0f17e3458537b231489 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:53:22 -0700 Subject: [PATCH 235/343] SAR should return areas as x not fractional areas --- macroeco/empirical/_empirical.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index bbee293..36cad35 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -488,6 +488,9 @@ def _sar_ear_inner(patch, cols, splits, divs, y_func): result_list = [] for substring, subpatch in _yield_subpatches(patch, splits): + # Get A0 + A0 = _patch_area(subpatch, x_col, y_col) + # Loop through all divisions within this split all_spp = np.unique(subpatch.table[spp_col]) subresultx = [] @@ -499,7 +502,7 @@ def _sar_ear_inner(patch, cols, splits, divs, y_func): spatial_table = _yield_spatial_table(subpatch, subdiv, spp_col, count_col, x_col, y_col) subresulty.append(y_func(spatial_table, all_spp)) - subresultx.append(1 / eval(subdiv.replace(',', '*'))) # a frac + subresultx.append(A0 / eval(subdiv.replace(',', '*'))) subresultnspp.append(np.mean(spatial_table['n_spp'])) subresultnindivids.append(np.mean(spatial_table['n_individs'])) @@ -1322,6 +1325,17 @@ def _parse_splits(patch, splits): return [''.join(x)[:-2] for x in _product(*subset_list)] +def _patch_area(patch, x_col, y_col): + + lengths = [] + for col in [x_col, y_col]: + col_step = eval(patch.meta[col]['step']) + col_min = eval(patch.meta[col]['min']) + col_max = eval(patch.meta[col]['max']) + lengths.append(col_max - col_min + col_step) + + return lengths[0] * lengths[1] + def _col_starts_ends(patch, col, slices): col_step = eval(patch.meta[col]['step']) # eval converts to float From 768d6ee1fb54a3f74b0c0738a96da49170a8e175 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:53:42 -0700 Subject: [PATCH 236/343] Improve formatting of pair column in comm_grid --- macroeco/empirical/_empirical.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 36cad35..f6a289f 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -577,7 +577,10 @@ def comm_grid(patch, cols, splits, divs, metric='Sorensen'): for i in range(len(spatial_table)): for j in range(i+1, len(spatial_table)): - pair_list.append(str(cell_loc[i]) + '-' + str(cell_loc[j])) + iloc = np.round(cell_loc[i], 6) + jloc = np.round(cell_loc[j], 6) + pair_list.append('('+str(iloc[0])+' '+str(iloc[1])+') - '+ + '('+str(jloc[0])+' '+str(jloc[1])+')') dist_list.append(_distance(cell_loc[i], cell_loc[j])) From 82a49bac9489bf77bbeb9c08bfbf259f0438fd61 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:54:07 -0700 Subject: [PATCH 237/343] More informative error for missing column in cols --- macroeco/empirical/_empirical.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index f6a289f..21399a0 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -1232,10 +1232,6 @@ def _get_cols(special_col_names, cols, patch): col_list = cols.split(';') col_dict = {x.split(':')[0]: x.split(':')[1] for x in col_list} - # Check for spp_col - if 'spp_col' not in col_dict.keys(): - raise NameError, ("spp_col not specified") - # Get special_col_names from dict result = [] for special_col_name in special_col_names: @@ -1246,10 +1242,16 @@ def _get_cols(special_col_names, cols, patch): col_name = 'count' patch.table['count'] = np.ones(len(patch.table)) + # All special cols must be specified (count must exist by now) + if col_name is None: + raise ValueError, ("Required column %s not specified" % + special_col_name) + result.append(col_name) return tuple(result), patch + @doc_sub(splits_note) def _yield_subpatches(patch, splits, name='split'): """ From a3e0c222437ee09233b69b77cd925dac26b1c497 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:55:25 -0700 Subject: [PATCH 238/343] Better method for locating centers of cells based on start and end --- macroeco/empirical/_empirical.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 21399a0..ceefcaa 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -1343,12 +1343,12 @@ def _patch_area(patch, x_col, y_col): def _col_starts_ends(patch, col, slices): - col_step = eval(patch.meta[col]['step']) # eval converts to float + col_step = eval(patch.meta[col]['step']) col_min = eval(patch.meta[col]['min']) col_max = eval(patch.meta[col]['max']) - step = (col_max - col_min + col_step) / eval(slices) - starts = np.arange(col_min, col_max + col_step, step) - ends = starts + step + edges = np.linspace(col_min-col_step/2, col_max+col_step/2, eval(slices)+1) + starts = edges[:-1] + ends = edges[1:] return starts, ends From b7c584c0f9a7f9bbbdee79673249236fdcd15748 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:55:41 -0700 Subject: [PATCH 239/343] Improve formatting of levels in subset strings --- macroeco/empirical/_empirical.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index ceefcaa..dd50b3e 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -1317,8 +1317,11 @@ def _parse_splits(patch, splits): col, val = split.split(':') if val == 'split': - level_list = [col + '==' + str(x) + ';' - for x in np.unique(patch.table[col])] + uniques = [] + for level in patch.table[col]: + if level not in uniques: + uniques.append(level) + level_list = [col + '==' + str(x) + '; ' for x in uniques] else: starts, ends = _col_starts_ends(patch, col, val) level_list = [col + '>=' + str(x) + '; ' + col + '<' + str(y)+'; ' From b6dc8a30d0beec1d45457c3f6cca5b37f9b7e027 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:55:53 -0700 Subject: [PATCH 240/343] Remove old distance functions --- macroeco/empirical/_empirical.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index dd50b3e..6e5b026 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -1187,33 +1187,6 @@ def tsed(self, criteria, normalize=True, exponent=0.75): return result -def _distance(pt1, pt2): - """Euclidean distance between two points""" - return np.sqrt((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) ** 2) - - -def decdeg_distance(pt1, pt2): - ''' Calculate Earth surface distance (in km) between decimal latlong points - using Haversine approximation. - - http://stackoverflow.com/questions/15736995/how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude-points - ''' - lat1, lon1 = pt1 - lat2, lon2 = pt2 - - # Convert decimal degrees to radians - lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2]) - - # haversine formula - dlon = lon2 - lon1 - dlat = lat2 - lat1 - a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2 - c = 2 * np.asin(np.sqrt(a)) - km = 6367 * c - - return km - - def _get_cols(special_col_names, cols, patch): """ Retrieve values of special_cols from cols string or patch metadata From 234a47b0795c2716224a76a6265fb56f4ae8b550 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 16:56:39 -0700 Subject: [PATCH 241/343] Complete unit tests for existing empirical functions --- macroeco/empirical/test_empirical.py | 718 +++++++++------------------ 1 file changed, 221 insertions(+), 497 deletions(-) diff --git a/macroeco/empirical/test_empirical.py b/macroeco/empirical/test_empirical.py index 630de38..45ab26f 100644 --- a/macroeco/empirical/test_empirical.py +++ b/macroeco/empirical/test_empirical.py @@ -1,452 +1,250 @@ -''' -Unit tests for empirical.py -''' - from __future__ import division +import os +from configparser import ConfigParser from numpy.testing import (TestCase, assert_equal, assert_array_equal, assert_almost_equal, assert_array_almost_equal, assert_allclose, assert_, assert_raises) +from pandas.util.testing import (assert_frame_equal) -from macroeco.empirical import * +import macroeco.empirical as emp +import macroeco.empirical._empirical as _emp import numpy as np +import pandas as pd import scipy.stats as stats -import numpy.testing as nt -class TestEmpiricalCDF(TestCase): + +class Patches(TestCase): + + def setUp(self): + local_path = os.path.dirname(os.path.abspath(__file__)) + + self.meta1_path = os.path.join(local_path, 'test_meta1.txt') + self.table1_path = os.path.join(local_path, 'test_table1.csv') + self.table1 = pd.DataFrame.from_csv(self.table1_path, index_col=False) + self.meta1 = ConfigParser() + self.meta1.read(self.meta1_path) + self.pat1 = emp.Patch(self.meta1_path) # No subset + self.cols1 = 'spp_col:spp; count_col:count; x_col:x; y_col:y' + self.A1 = 0.2 * 0.3 + + +class TestPatch(Patches): + + def test_load_data_meta(self): + assert_array_equal(self.pat1.table, self.table1) + assert_equal(self.pat1.meta, self.meta1) + + def test_subset_numeric(self): + pat1 = emp.Patch(self.meta1_path, 'x>=0.2') + assert_array_equal(pat1.table, self.table1[self.table1.x >= 0.2]) + + self.meta1['x']['min'] = '0.2' + assert_equal(pat1.meta, self.meta1) + + def test_subset_categorical(self): + pat1 = emp.Patch(self.meta1_path, "spp=='b'") + assert_array_equal(pat1.table, self.table1[self.table1['spp']=='b']) + assert_equal(pat1.meta, self.meta1) # Meta should not change + + def test_multiple_subset(self): + # Only first element in table remains + pat1 = emp.Patch(self.meta1_path, "spp=='a' ; y < 0.2") + assert_array_equal(pat1.table.iloc[0], self.table1.iloc[0]) + assert_equal(len(pat1.table), 1) + + self.meta1['y']['max'] = '0.1' + assert_equal(pat1.meta, self.meta1) + + +class TestSAD(Patches): + + def test_simple(self): + # Falling back on spp_col in metadata, so count 1 for each row + sad = emp.sad(self.pat1, None, None) + assert_equal(sad[0][1]['y'], [3,2]) + + def test_simple_with_cols(self): + # Specify count and spp_col here + sad = emp.sad(self.pat1, self.cols1, None) + assert_equal(sad[0][1]['y'], [4,3]) + + def test_two_way_split(self): + # Complete split generates 6 results + sad = emp.sad(self.pat1, self.cols1, 'x:2; y:3') + assert_equal(len(sad), 6) + + # Goes through x then y + assert_equal(sad[0][1]['spp'].values, 'a') + assert_equal(sad[0][1]['y'].values, 2) + assert_equal(sad[1][1]['y'].values, [1,1]) + assert_equal(sad[5][1]['spp'].values, 'b') + assert_equal(sad[0][1]['y'].values, 2) + + def test_one_way_uneven_split(self): + # 0.2 should fall in second division of y + sad = emp.sad(self.pat1, self.cols1, 'y:2') + print sad + assert_equal(len(sad), 2) + assert_equal(sad[0][1]['spp'].values, ['a']) + assert_equal(sad[0][1]['y'].values, [2]) + assert_equal(sad[1][1]['spp'].values, ['a','b']) + assert_equal(sad[1][1]['y'].values, [2,3]) + + def test_split_categorical(self): + sad = emp.sad(self.pat1, self.cols1, 'year:split; x:2') + assert_equal(sad[0][1]['y'].values, 3) + assert_equal(sad[1][1]['y'].values, []) + assert_equal(sad[2][1]['y'].values, [1,1]) + assert_equal(sad[3][1]['y'].values, [2]) + + def test_clean(self): + # No a in second split on x + sad = emp.sad(self.pat1, self.cols1, 'x:2', clean=False) + assert_equal(len(sad[1][1]), 2) # Both spp when clean False + + sad = emp.sad(self.pat1, self.cols1, 'x:2', clean=True) + assert_equal(len(sad[1][1]), 1) # Only 'b' when clean True + + +class TestSSAD(Patches): + + def test_no_splits(self): + # Just total abundance by species + ssad = emp.ssad(self.pat1, self.cols1, None) + assert_equal(ssad[0][1]['y'], [4]) + assert_equal(ssad[1][1]['y'], [3]) + + def test_with_split(self): + ssad = emp.ssad(self.pat1, self.cols1, 'x:2') + assert_equal(ssad[0][1]['y'], [4,0]) # spp a + assert_equal(ssad[1][1]['y'], [1,2]) # spp b + + +class TestSAR(Patches): + + def test_no_splits(self): + sar = emp.sar(self.pat1, self.cols1, None, '1,1; 2,1; 2,3') + assert_almost_equal(sar[0][1]['x'], + [1*self.A1, 0.5*self.A1, 1/6*self.A1]) + assert_equal(sar[0][1]['y'], [2, 1.5, (1+2+1+0+0+1)/6.]) + + def test_with_split(self): + sar = emp.sar(self.pat1, self.cols1, 'year:split', '2,1; 1,3') + assert_almost_equal(sar[0][1]['x'], [0.5*self.A1, 1/3.*self.A1]) + assert_almost_equal(sar[1][1]['x'], [0.5*self.A1, 1/3.*self.A1]) + assert_equal(sar[0][1]['y'], [0.5, 2/3.]) + assert_equal(sar[1][1]['y'], [3/2., 1]) + + def test_single_division(self): + sar = emp.sar(self.pat1, self.cols1, None, '2,1') + assert_almost_equal(sar[0][1]['x'], [0.5*self.A1]) + assert_equal(sar[0][1]['y'], [1.5]) + + +class TestEAR(Patches): + + def test_no_splits(self): + sar = emp.sar(self.pat1, self.cols1, None, '1,1; 2,1; 2,3', ear=True) + assert_equal(sar[0][1]['y'], [2, 0.5, 0]) + + def test_with_split(self): + sar = emp.sar(self.pat1, self.cols1, 'year:split', '2,1;1,3', ear=True) + assert_equal(sar[0][1]['y'], [0.5, 0]) + assert_equal(sar[1][1]['y'], [0.5, 1/3.]) + + +class TestCommGrid(Patches): + + def test_no_splits_Sorensen(self): + comm = emp.comm_grid(self.pat1, self.cols1, None, '2,1') + assert_almost_equal(comm[0][1]['x'], [0.1]) + assert_equal(comm[0][1]['y'], [2./(2+1)]) + + def test_no_splits_Jaccard(self): + comm = emp.comm_grid(self.pat1, self.cols1, None, '2,1', + metric='Jaccard') + assert_almost_equal(comm[0][1]['x'], [0.1]) + assert_equal(comm[0][1]['y'], [1/2.]) + + def test_with_split(self): + comm = emp.comm_grid(self.pat1, self.cols1, 'year:split', '2,1') + assert_equal(comm[0][1]['y'], [0]) + assert_equal(comm[1][1]['y'], [2/3.]) + + def test_y_division_even(self): + comm = emp.comm_grid(self.pat1, self.cols1, '', '1,3') + assert_equal(comm[0][1]['pair'], ['(0.15 0.1) - (0.15 0.2)', + '(0.15 0.1) - (0.15 0.3)', + '(0.15 0.2) - (0.15 0.3)']) + assert_almost_equal(comm[0][1]['x'], [0.1, 0.2, 0.1]) + assert_equal(comm[0][1]['y'], [2/3., 2/3., 1.]) + + def test_x_y_division_uneven_y(self): + comm = emp.comm_grid(self.pat1, self.cols1, '', '2,2') + print comm + assert_equal(comm[0][1]['pair'], ['(0.1 0.125) - (0.1 0.275)', + '(0.1 0.125) - (0.2 0.125)', + '(0.1 0.125) - (0.2 0.275)', + '(0.1 0.275) - (0.2 0.125)', + '(0.1 0.275) - (0.2 0.275)', + '(0.2 0.125) - (0.2 0.275)']) + assert_almost_equal(comm[0][1]['x'], [0.15, 0.1, 0.180278, 0.180278, + 0.1, 0.15], 6) + assert_equal(comm[0][1]['y'], [2/3., 0, 0, 0, 2/3., 0]) + + def test_x_y_division_uneven_y_jaccard(self): + comm = emp.comm_grid(self.pat1, self.cols1, '', '2,2',metric='Jaccard') + assert_equal(comm[0][1]['y'], [1/2., 0, 0, 0, 1/2., 0]) + + +class TestProduct(): + + def test_product_with_order(self): + # Several places rely on product to sequentially loop first -> last + expected = [[1,5], [1,6], [1,7], [2,5], [2,6], [2,7]] + assert_equal(_emp._product([1,2],[5,6,7]), expected) + + +class TestDistance(): + + def test_cartesian_distance(self): + assert_equal(_emp._distance((0,0),(2,2)), np.sqrt(8)) + + +class TestDecDegDistance(): + + def test_ucberkeley_to_sf(self): + # Latlong: http://www.findlatitudeandlongitude.com + # Dist: http://www.movable-type.co.uk/scripts/latlong.html (17.37 km) + berkeley = (37.87133, -122.259293) + sf = (37.780213, -122.419968) + assert_almost_equal(_emp._decdeg_distance(berkeley, sf), 17.37, 1) + + +class TestEmpiricalCDF(): def test_sorted_data(self): test_data = [1, 1, 1, 1, 2, 3, 4, 5, 6, 6] ans = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] - res = empirical_cdf(test_data) + res = emp.empirical_cdf(test_data) assert_array_equal(ans, res['ecdf']) def test_unsorted_data(self): test_data = [6, 6, 1, 1, 5, 1, 1, 2, 3, 4] ans = [.4, .4, .4, .4, .5, .6, .7, .8, 1, 1] - res = empirical_cdf(test_data) + res = emp.empirical_cdf(test_data) assert_array_equal(ans, res['ecdf']) # Result sorted assert_array_equal(np.sort(test_data), res['data']) # Data sorted def test_all_data_same(self): test_data = [3, 3, 3, 3] ans = [1, 1, 1, 1] - res = empirical_cdf(test_data) + res = emp.empirical_cdf(test_data) assert_array_equal(ans, res['ecdf']) -# class TestPatch(unittest.TestCase): - -# def setUp(self): -# self.xyfile5 = open('xyfile5.csv','w') -# self.xyfile5.write('''spp_code, x, y, count -# grt, .1, .1, 2 -# grt, .1, .2, 1 -# grt, .1, .3, 1 -# rty, .1, .2, 1 -# rty, .2, .3, 2''') -# self.xyfile5.close() -# self.xymeta5 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', -# 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, -# ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio'} - -# self.pat1 = Patch('xyfile5.csv') -# # Line below sets metadata manually-no metadata file loaded -# self.pat1.data_table.meta = self.xymeta5 - -# self.xyfile6 = open('xyfile6.csv', 'w') -# self.xyfile6.write('''spp_code, x, y, count -# a, 0, 0, 1 -# b, 0, 0, 1 -# c, 0, 0, 0 -# d, 0, 0, 3 -# a, 0, 1, 0 -# b, 0, 1, 4 -# c, 0, 1, 0 -# d, 0, 1, 1 -# a, 1, 0, 1 -# b, 1, 0, 0 -# c, 1, 0, 3 -# d, 1, 0, 1 -# a, 1, 1, 0 -# b, 1, 1, 1 -# c, 1, 1, 3 -# d, 1, 1, 1''') -# self.xyfile6.close() -# self.xymeta6 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', -# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, -# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio'} -# self.pat2 = Patch('xyfile6.csv') -# self.pat2.data_table.meta = self.xymeta6 - -# self.xyfile7 = open('xyfile7.csv', 'w') -# self.xyfile7.write('''spp_code, x, y, count -# tery, 1, 1, 1 -# 1, 1, 1, 1 -# 2, 1, 1, 0 -# 3, 1, 1, 3 -# 0, 1, 2, 0 -# 1, 1, 2, 4 -# 2, 1, 2, 0 -# tery, 1, 2, 1 -# 0, 2, 1, 1 -# 1, 2, 1, 0 -# 2, 2, 1, 3 -# 3, 2, 1, 1 -# tery, 2, 2, 0 -# 1, 2, 2, 1 -# 2, 2, 2, 3 -# 3, 2, 2, 1''') -# self.xyfile7.close() -# self.xymeta7 = {('x', 'maximum'): 2, ('x', 'minimum'): 1, ('x', -# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 2, -# ('y', 'minimum'): 1, ('y', 'precision'): 1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio'} -# self.pat3 = Patch('xyfile7.csv') -# self.pat3.data_table.meta = self.xymeta7 - -# self.xyfile8 = open('xyfile8.csv', 'w') -# self.xyfile8.write('''spp_code, x, y, count -# 0, 0, 0, 1 -# 1, 0, 0, 1 -# 2, 0, 0, 0 -# 3, 0, 0, 3 -# 0, 0, 1, 0 -# 1, 0, 1, 4 -# 2, 0, 1, 0 -# 3, 0, 1, 1 -# 0, 1, 0, 1 -# 1, 1, 0, 0 -# 2, 1, 0, 3 -# 3, 1, 0, 1 -# 0, 1, 1, 0 -# 1, 1, 1, 1 -# 2, 1, 1, 3 -# 3, 1, 1, 1 -# 0, 2, 0, 0 -# 1, 2, 0, 0 -# 2, 2, 0, 2 -# 3, 2, 0, 4 -# 0, 2, 1, 0 -# 1, 2, 1, 0 -# 2, 2, 1, 0 -# 3, 2, 1, 1''') -# self.xyfile8.close() -# self.xymeta8 = {('x', 'maximum'): 2, ('x', 'minimum'): 0, ('x', -# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, -# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio'} -# self.pat4 = Patch('xyfile8.csv') -# self.pat4.data_table.meta = self.xymeta8 -# self.xyfile9 = open('xyfile9.csv','w') -# self.xyfile9.write('''spp_code, x, y, count, energy, mass -# grt, .1, .1, 2, 1, 34 -# grt, .1, .2, 1, 2, 12 -# grt, .1, .3, 1, 3, 23 -# rty, .1, .2, 1, 4, 45 -# rty, .2, .3, 1, 5, 110''') -# self.xyfile9.close() -# self.xymeta9 = {('x', 'maximum'): .2, ('x', 'minimum'): .1, ('x', -# 'precision'): .1, ('x', 'type'): 'interval', ('y', 'maximum'): .3, -# ('y', 'minimum'): .1, ('y', 'precision'): .1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio'} - -# self.pat5 = Patch('xyfile9.csv') -# self.pat5.data_table.meta = self.xymeta9 -# self.xyfile10 = open('xyfile10.csv', 'w') -# self.xyfile10.write('''spp_code, x, y, count -# a, 0, 0, 1 -# b, 0, 0, 1 -# d, 0, 0, 3 -# b, 0, 1, 4 -# d, 0, 1, 1 -# a, 1, 0, 1 -# c, 1, 0, 3 -# d, 1, 0, 1 -# b, 1, 1, 1 -# c, 1, 1, 3 -# d, 1, 1, 1''') -# self.xyfile10.close() -# self.xymeta10 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', -# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, -# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio'} -# self.pat6 = Patch('xyfile10.csv') -# self.pat6.data_table.meta = self.xymeta10 -# self.xyfile11 = open('xyfile11.csv', 'w') -# self.xyfile11.write('''spp_code, x, y, count, reptile -# a, 0, 0, 1, lizard -# b, 0, 0, 1, lizard -# d, 0, 0, 3, snake -# b, 0, 1, 4, lizard -# d, 0, 1, 1, turtle -# a, 1, 0, 1, snake -# c, 1, 0, 3, lizard -# d, 1, 0, 1, snake -# b, 1, 1, 1, tuatara -# c, 1, 1, 3, turtle -# d, 1, 1, 1, snake''') -# self.xyfile11.close() -# self.xymeta11 = {('x', 'maximum'): 1, ('x', 'minimum'): 0, ('x', -# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 1, -# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio', ('reptile', 'maximum') -# : None, ('reptile', 'minimum') : None, ('reptile', 'precision'):None, -# ('reptile', 'type') : 'ordinal'} -# self.pat7 = Patch('xyfile11.csv') -# self.pat7.data_table.meta = self.xymeta11 - -# self.xyfile12 = open('xyfile12.csv', 'w') -# self.xyfile12.write('''spp_code, x, y, count -# 3, 0, 0, 3 -# 3, 0, 1, 1 -# 2, 0, 2, 3 -# 1, 0, 3, 8 -# 3, 1, 0, 1 -# 3, 1, 1, 1 -# 0, 1, 2, 5 -# 3, 1, 3, 1 -# 2, 2, 0, 1 -# 1, 2, 1, 3 -# 1, 2, 2, 6 -# 0, 2, 3, 1 -# 1, 3, 0, 9 -# 2, 3, 1, 1 -# 0, 3, 2, 3 -# 3, 3, 3, 1''') -# self.xyfile12.close() -# self.xymeta12 = {('x', 'maximum'): 3, ('x', 'minimum'): 0, ('x', -# 'precision'): 1, ('x', 'type'): 'interval', ('y', 'maximum'): 3, -# ('y', 'minimum'): 0, ('y', 'precision'): 1, ('y', 'type'): 'interval', -# ('spp_code', 'maximum'): None, ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, ('spp_code', 'type'): 'ordinal', -# ('count', 'maximum'): None, ('count', 'minimum'): None, ('count', -# 'precision'): None, ('count', 'type'): 'ratio'} -# self.pat8 = Patch('xyfile12.csv') -# self.pat8.data_table.meta = self.xymeta12 - -# # Data file with three count colums, unique row for each species -# self.xyfile13 = open('xyfile13.csv', 'w') -# self.xyfile13.write('''spp_code, order, plot1, plot2, plot3 -# a, pred, 0, 0, 0 -# b, pred, 0, 0, 1 -# c, pred, 0, 1, 0 -# d, pred, 0, 2, 3 -# e, scav, 0, 1, 0 -# f, scav, 0, 1, 4''') -# self.xyfile13.close() -# self.xymeta13 = {('spp_code', 'maximum'): None, -# ('spp_code', 'minimum'): None, -# ('spp_code', 'precision'): None, -# ('spp_code', 'type'): 'ordinal', -# ('order', 'maximum'): None, -# ('order', 'minimum'): None, -# ('order', 'precision'): None, -# ('order', 'type'): 'ordinal', -# ('plot1', 'maximum'): None, -# ('plot1', 'minimum'): None, -# ('plot1', 'precision'): None, -# ('plot1', 'type'): 'ratio', -# ('plot2', 'maximum'): None, -# ('plot2', 'minimum'): None, -# ('plot2', 'precision'): None, -# ('plot2', 'type'): 'ratio', -# ('plot3', 'maximum'): None, -# ('plot3', 'minimum'): None, -# ('plot3', 'precision'): None, -# ('plot3', 'type'): 'ratio'} -# self.pat9 = Patch('xyfile13.csv') -# self.pat9.data_table.meta = self.xymeta13 - - - - -# def tearDown(self): -# os.remove('xyfile5.csv') -# os.remove('xyfile6.csv') -# os.remove('xyfile7.csv') -# os.remove('xyfile8.csv') -# os.remove('xyfile9.csv') -# os.remove('xyfile10.csv') -# os.remove('xyfile11.csv') -# os.remove('xyfile12.csv') -# os.remove('xyfile13.csv') - -# # -# # init and set_attributes -# # - -# def test_patch_init(self): - -# # Test entire table is loaded -# self.assertTrue(len(self.pat1.data_table.table) == 5) -# self.assertTrue(len(self.pat2.data_table.table) == 16) - -# # Test that subsetting works -# pat = Patch('xyfile6.csv', {'spp_code': [('!=','a'), ('!=', 'b'), -# ('!=','c')]}) -# self.assertTrue(np.all(pat.data_table.table['spp_code'] == 'd')) -# pat = Patch('xyfile7.csv', {'spp_code': ('==', "tery")}) -# self.assertTrue(sum(pat.data_table.table['count']) == 2) - -# # Testing that metadata was set correctly -# self.assertTrue(self.pat1.data_table.meta[('x', 'maximum')] == .2) - -# def test_sad(self): - -# # Test correct result with 'whole' and one division -# sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', -# 'x': 1}) -# self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) -# sad = self.pat1.sad({'spp_code': 'species', 'count': 'count', -# 'x': 'whole'}) -# self.assertTrue(np.array_equal(sad[0][1], np.array([4,3]))) -# sad = self.pat4.sad({'spp_code': 'species', 'count' :'count', 'x': 1}) -# self.assertTrue(np.array_equal(sad[0][2], np.array([0,1,2,3]))) - -# # Test correct result with other divisions -# sad = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x': 3, -# 'y': 2}) -# self.assertTrue(np.array_equal(sad[-1][1], np.array([0,0,0,1]))) - -# # Test that 'whole' and ignore give the same result -# sad1 = self.pat4.sad({'spp_code': 'species', 'count': 'count'}) -# sad2 = self.pat4.sad({'spp_code': 'species', 'count': 'count', 'x' : -# 'whole'}) -# self.assertTrue(np.array_equal(sad1[0][1], sad2[0][1])) - -# # Test that 'split' keyword returns the correct results -# sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' -# : 'count'}) -# self.assertTrue(len(sad) == 5) -# self.assertTrue(np.array_equal(sad[0][1], np.array([2,0]))) - -# # Test split and clean on numeric column -# sad = self.pat5.sad({'spp_code' :'species', 'energy':'split', 'count' -# : 'count'}, clean=True) -# self.assertTrue(len(sad) == 5) -# self.assertTrue(np.array_equal(sad[0][1], np.array([2]))) - -# # Test that cleaning sad and split works on string -# sad = self.pat7.sad({'spp_code' : 'species', 'count' : 'count', -# 'reptile' : 'split'}, clean=True) -# self.assertTrue(len(sad) == 4) -# self.assertTrue(np.array_equal(sad[0][1], np.array([1,5,3]))) -# self.assertTrue(np.array_equal(sad[2][1], np.array([1]))) -# self.assertTrue(sad[2][2][0] == 'b') - -# def test_parse_criteria(self): - -# # Checking parse returns what we would expect -# pars = self.pat4.parse_criteria({'spp_code': 'species', 'count': 'count', -# 'x': 1}) -# self.assertTrue(pars[1] == 'spp_code') -# self.assertTrue(pars[2] == 'count') - -# # Test that energy, mass and count col are None -# pars = self.pat4.parse_criteria({'spp_code': 'species', -# 'y': 'whole'}) -# self.assertTrue((pars[2] == None) and (pars[3] == None) and (pars[4] == -# None)) - -# # If species is not specified correctly an error is thrown -# self.assertRaises(ValueError, self.pat3.parse_criteria, {'spp_col' -# :'species'}) -# # Make sure if count is not passed, no error is thrown -# self.pat3.parse_criteria({'spp_code': 'species'}) - -# # Check energy and mass returns -# pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': -# 'count', 'energy': 'energy'}) - -# self.assertTrue(pars[3] == 'energy') -# self.assertTrue(pars[4] == None) - -# # Check that combinations in empty dict if no criteria given -# pars = self.pat5.parse_criteria({'spp_code': 'species', 'count': -# 'count'}) -# self.assertTrue(pars[5] == [{}]) - -# # TODO: Test that error is thrown if step < prec - -# def test_sar(self): - -# # Checking that sar function returns correct S0 for full plot -# sar = self.pat3.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', -# 'count': 'count'}) -# self.assertTrue(sar[0]['items'][0] == 5) - -# # Checking for correct result for sar -# sar = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', -# 'count': 'count'}) -# self.assertTrue(np.array_equal(sar[1][1], np.array([3,3,2,3]))) -# sar = self.pat4.sar(('x', 'y'), [(1,1), (1,2), (3,2)], {'spp_code': -# 'species', 'count': 'count'}, form='sar') -# self.assertTrue(np.array_equal(sar[1][2], np.array([3,3,2,2,3,1]))) - -# # Checking for correct result for ear -# ear = self.pat3.sar(('x', 'y'), [(1,1), (2,2)], {'spp_code': 'species', -# 'count': 'count'}, form='ear') -# self.assertTrue(np.array_equal(ear[1][1], np.array([0,1,0,0]))) - -# # Test that returned areas are correct -# sar = self.pat1.sar(('x', 'y'), [(1,1)], {'spp_code': 'species', -# 'count': 'count'}) -# self.assertTrue(np.round(sar[0]['area'][0], decimals=2) == 0.06) -# self.assertTrue(sar[0]['items'][0] == 2) - -# def test_universal_sar(self): - -# # Check that it returns the right length -# criteria = {'spp_code': 'species', 'count' : 'count'} -# div_cols = ('x', 'y') -# vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2), (2,4), -# (4,4)], criteria) -# self.assertTrue(len(vals) == 3) - -# # If (1,1) is not passed in it should have a length of zero -# vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2)], criteria) -# self.assertTrue(len(vals) == 0) - -# # If (1,1) is not passed in but include_full == True should have len -# # equal to 1 -# vals = self.pat8.universal_sar(div_cols, [(1,2), (2,2), (2,4)], -# criteria, -# include_full=True) -# self.assertTrue(len(vals) == 2) - -# # Test that I get the correct z-value back -# vals = self.pat8.universal_sar(div_cols, [(1,1), (1,2), (2,2)], -# criteria) -# self.assertTrue(np.round(vals['z'][0], decimals=4) == 0.3390) - -# # If I pass in something other than a halving I should still get -# # something back -# vals = self.pat8.universal_sar(div_cols, [(1,1), (2,2), (2,4), (4,4)], -# criteria) -# self.assertTrue(len(vals) == 2) + + # def test_comm_sep(self): @@ -525,77 +323,3 @@ def test_all_data_same(self): # density=True) # np.testing.assert_array_almost_equal(result_list[0][2][1], # np.array((1358.12218105,0))) - -# def test_ssad(self): - -# # Check that ssad does not lose any individuals -# ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count'}) -# sad = self.pat2.sad({'spp_code': 'species', 'count': 'count'}) -# sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) -# self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) - -# ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count'}) -# sad = self.pat6.sad({'spp_code': 'species', 'count': 'count'}) -# sum_ssad = np.array([sum(val) for val in ssad[1].itervalues()]) -# self.assertTrue(sum(sad[0][1]) == sum(sum_ssad)) - -# # Manual checks of correct ssad -# ssad = self.pat2.ssad({'spp_code': 'species', 'count': 'count', 'x': -# 2, 'y': 2}) -# self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) -# self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) -# self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) -# self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) - -# ssad = self.pat6.ssad({'spp_code': 'species', 'count': 'count', 'x' : -# 2, 'y': 2}) -# self.assertTrue(set(ssad[1]['a']) == {1, 0, 1, 0}) -# self.assertTrue(set(ssad[1]['b']) == {1, 4, 0, 1}) -# self.assertTrue(set(ssad[1]['c']) == {0, 0, 3, 3}) -# self.assertTrue(set(ssad[1]['d']) == {3, 1, 1, 1}) - -# def test_ied(self): - -# # Test correct length of result -# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', -# 'energy': 'energy'}) -# self.assertTrue(len(eng[0][1]) == 6) - -# # Test error if energy column is missing -# self.assertRaises(ValueError, self.pat5.ied, -# {'spp_code': 'species', 'count': 'count'}) - -# # Test normalize is working -# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', -# 'energy': 'energy', 'x': 2}) -# self.assertTrue(np.array_equal(eng[1][1], np.array([1]))) -# self.assertTrue(len(eng[0][1]) == 5) - -# # Test mass column and normalize -# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', -# 'mass' : 'mass'}, exponent=1, normalize=False) -# self.assertTrue(np.array_equal(eng[0][1], np.array([17,17,12,23,45, -# 110]))) - -# # Test that energy overrides mass -# eng = self.pat5.ied({'spp_code': 'species', 'count': 'count', -# 'mass' : 'mass', 'energy' : 'energy'}, normalize=False) -# self.assertTrue(np.array_equal(eng[0][1], np.array([.5,.5,2,3,4,5]))) - -# def test_sed(self): - -# # Check correct result -# eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', -# 'energy': 'energy'}) -# self.assertTrue(np.array_equal(eng[0][1]['grt'], -# np.array([1,1,4,6]))) -# self.assertTrue(np.array_equal(eng[0][1]['rty'], -# np.array([8,10]))) - -# eng = self.pat5.sed({'spp_code': 'species', 'count': 'count', -# 'energy': 'energy', 'x': 2}) -# self.assertTrue(np.array_equal(eng[1][1]['rty'], np.array([1]))) -# self.assertTrue(len(eng[1][1]) == 2) - -# if __name__ == "__main__": -# unittest.main() From bf06f4b127a758be548510d48eea08f0c40c2d45 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 12 Apr 2014 17:31:20 -0700 Subject: [PATCH 242/343] Supporting test files for test_empirical --- macroeco/empirical/test_meta1.txt | 14 ++++++++++++++ macroeco/empirical/test_table1.csv | 6 ++++++ 2 files changed, 20 insertions(+) create mode 100644 macroeco/empirical/test_meta1.txt create mode 100644 macroeco/empirical/test_table1.csv diff --git a/macroeco/empirical/test_meta1.txt b/macroeco/empirical/test_meta1.txt new file mode 100644 index 0000000..a8e157e --- /dev/null +++ b/macroeco/empirical/test_meta1.txt @@ -0,0 +1,14 @@ +[Description] +name = Test Table 1 +datapath = test_table1.csv +cols = spp_col:spp + +[x] +min = 0.1 +max = 0.2 +step = 0.1 + +[y] +min = 0.1 +max = 0.3 +step = 0.1 \ No newline at end of file diff --git a/macroeco/empirical/test_table1.csv b/macroeco/empirical/test_table1.csv new file mode 100644 index 0000000..60a9107 --- /dev/null +++ b/macroeco/empirical/test_table1.csv @@ -0,0 +1,6 @@ +spp,x,y,count,year +a,.1,.1,2,2000 +a,.1,.2,1,2000 +a,.1,.3,1,2010 +b,.1,.2,1,2010 +b,.2,.3,2,2010 \ No newline at end of file From e002b9e6604d1258e56e14254adf0dd9196e14ba Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 13 Apr 2014 08:08:03 -0700 Subject: [PATCH 243/343] Included mete_sar in init --- macroeco/models/__init__.py | 7 ++-- macroeco/models/_distributions.py | 54 +++++++++++++++++++++++++-- macroeco/models/test_distributions.py | 36 ++++++++++++++++++ 3 files changed, 91 insertions(+), 6 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 7860e54..7e1d039 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -32,12 +32,13 @@ nbinom cnbinom logser_uptrunc + lognorm """ -from ._distributions import (geom, geom_uptrunc, nbinom, cnbinom, - logser_uptrunc, - expon, expon_uptrunc) +from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, + logser_uptrunc, expon, expon_uptrunc, + lognorm) from ._curves import (power_law, mete_sar, mete_iterative_sar, diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 85b5b65..9a15fa9 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -460,7 +460,7 @@ def nbinom_nll(data, k_agg, mu): class cnbinom_gen(rv_discrete_meco): r""" - The conditional negative binomial random variable + The conditional negative binomial random variable. This distribution was described by Zillio and He (2010) [#]_ and Conlisk et al. (2007) [#]_ @@ -554,7 +554,7 @@ def _ln_choose(n, k_agg): class logser_uptrunc_gen(rv_discrete_meco): r""" - Upper truncated logseries random variable + Upper truncated logseries random variable. This distribution was described by Harte (2011) [#]_ @@ -666,7 +666,7 @@ def _trunc_logser_solver(bins, b): bins : float Number of bins. Considered S in an ecological context b : float - Upper truncation of distribution + Upper truncation of distribution. Considered N in an ecological context Returns ------- @@ -811,6 +811,54 @@ def _stats(self, lam, b): expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') +class lognorm_gen(rv_continuous_meco): + r""" + A lognormal random variable. + + .. math:: + + f(x) = \frac{1}{\sigma x \sqrt{2 \pi}} e^{(\log{x} - \mu)^2 / 2 + \sigma^2} + + Methods + ------- + translate_args(mean, sigma) + Shape parameters mu and sigma given mean and sigma + fit_mle(data, b=sum(data)) + ML estimate of shape parameters mu and sigma + %(before_notes)s + mu : float + mu parameter of lognormal distribution. Mean log(x) + sigma : float + sigma parameter of lognormal distribution. sd of log(x) + + """ + + @inherit_docstring_from(rv_continuous_meco) + def translate_args(self, mean, sigma): + return np.log(mean) - (sigma ** 2 / 2), sigma + + @inherit_docstring_from(rv_continuous_meco) + def fit_mle(self, data, mean=None): + + sigma, _, scale = stats.lognorm.fit(data, floc=0) + return np.log(scale), sigma + + def _rvs(self, mu, sigma): + return stats.lognorm.rvs(sigma, scale=np.exp(mu)) + + def _pdf(self, x, mu, sigma): + return stats.lognorm.pdf(x, sigma, scale=np.exp(mu)) + + def _cdf(self, x, mu, sigma): + return stats.lognorm.cdf(x, sigma, scale=np.exp(mu)) + + def _stats(self, mu, sigma): + return stats.lognorm.stats(sigma, scale=np.exp(mu)) + +lognorm = lognorm_gen(name="lognorm", shapes="mu, sigma") + + def _solve_k_from_mu(data, k_range, nll, *args): """ For given args, return k_agg from searching some k_range. diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index d6556f4..7e12b7f 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -280,6 +280,42 @@ def test_n_close_to_s(self): _trunc_logser_solver(3, 4) _trunc_logser_solver(100, 101) +class TestLognorm(TestCase): + + def test_pmf(self): + # R pmf: dlnorm(c(1:10), 2, 2) + r_output = np.array([0.1210, .0806, .0601, 0.0476, 0.0391, .0331, + 0.0285, 0.0249, 0.0221, 0.0197]) + + test1 = lognorm.pdf(np.arange(1, 11), 2, 2) + assert_array_almost_equal(test1, r_output, decimal=4) + + def test_cdf(self): + # R cdf: plnorm(c(1,1,4,5,12), 1.2, 3.45) + r_output = np.array([0.3639854, 0.3639854, 0.5215318, 0.5472346, + 0.6452161]) + + test = lognorm.cdf([1, 1, 4, 5, 12], 1.2, 3.45) + assert_array_almost_equal(test, r_output, decimal=7) + + def test_translate_args(self): + + mean = 67; sigma = 2 + mu, sigma = lognorm.translate_args(mean, sigma) + + # Expected mu: np.log(mean) - (sigma**2 / 2) + exp_mu = 2.2046926 + assert_almost_equal(mu, exp_mu) + + def test_fit_mle(self): + + + + + + + + class TestExpon(TestCase): pass From b079f19bf07f49a460ca85d2ca7d6ee32368deab Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 13 Apr 2014 09:14:37 -0700 Subject: [PATCH 244/343] Added lognormal distribution --- macroeco/models/_distributions.py | 41 +++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 9a15fa9..d8b9d44 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -780,8 +780,11 @@ def translate_args(self, mu, b): @inherit_docstring_from(rv_continuous_meco) def fit_mle(self, data, b=None): """%(super)s - In addition to data, requires ``b``, the upper limit of the - distribution. + + Additional Parameters + ---------------------- + b : float + The upper limit of the distribution """ if not b: b = np.sum(data) @@ -839,10 +842,28 @@ def translate_args(self, mean, sigma): return np.log(mean) - (sigma ** 2 / 2), sigma @inherit_docstring_from(rv_continuous_meco) - def fit_mle(self, data, mean=None): + def fit_mle(self, data, fix_mean=False): + """%(super)s + +Additional Parameters +---------------------- +fix_mean : bool + Default False. If True, fixes mean before optimizing sigma + + """ - sigma, _, scale = stats.lognorm.fit(data, floc=0) - return np.log(scale), sigma + if not fix_mean: + sigma, _, scale = stats.lognorm.fit(data, floc=0) + return np.log(scale), sigma + + else: + mean = np.mean(data) + sigma = optim.fmin(mle, np.array([np.std(np.log(data), ddof=1)]), + args=(data, mean), disp=0)[0] + return self.translate_args(mean, sigma) + + def _argcheck(self, mu, sigma): + return True def _rvs(self, mu, sigma): return stats.lognorm.rvs(sigma, scale=np.exp(mu)) @@ -859,6 +880,16 @@ def _stats(self, mu, sigma): lognorm = lognorm_gen(name="lognorm", shapes="mu, sigma") +def tpdf(x, mean, sigma): + # Lognorm pmf with mean for optimization + mu, sigma = lognorm.translate_args(mean, sigma) + return lognorm.logpdf(x, mu, sigma) + + +def mle(sigma, x, mean): + # MLE function for lognormal + return -1 * np.sum(tpdf(x, mean, sigma)) + def _solve_k_from_mu(data, k_range, nll, *args): """ For given args, return k_agg from searching some k_range. From 9aaf439ca0528b090b9715ef701c1cee5d1ce4ae Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 13 Apr 2014 09:14:51 -0700 Subject: [PATCH 245/343] Unittested lognormal distribution --- macroeco/models/test_distributions.py | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 7e12b7f..41a92b1 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -290,6 +290,11 @@ def test_pmf(self): test1 = lognorm.pdf(np.arange(1, 11), 2, 2) assert_array_almost_equal(test1, r_output, decimal=4) + # R pmf: dlnorm(5, -3, 5) + r_ans = 0.0104333 + test2 = lognorm.pdf(5, -3, 5) + assert_almost_equal(test2, r_ans) + def test_cdf(self): # R cdf: plnorm(c(1,1,4,5,12), 1.2, 3.45) r_output = np.array([0.3639854, 0.3639854, 0.5215318, 0.5472346, @@ -308,13 +313,41 @@ def test_translate_args(self): assert_almost_equal(mu, exp_mu) def test_fit_mle(self): + ''' + # R code + pmf <- function(x, N, S, sigma){ + mu = log(N / S) - (sigma^2 / 2) + dlnorm(x, meanlog=mu, sdlog=sigma) + } + + mle <- function(sdlog, x, N, S){ + -sum(log(pmf(x, N, S, sdlog))) + } + params <- function(x){ + N = sum(x); + S = length(x); + optimize(mle, interval=c(0,5), x, N, S) + } + data = # some data + params(data)''' + data1 = [1, 1, 1, 1, 1, 2, 2, 3, 3, 4, 5, 6, 123, 456] + data2 = [2, 2, 2, 4, 67, 34, 152, 9] + r_fits = [2.07598, 1.59213] # data1, data2 + testfit1 = lognorm.fit_mle(data1, fix_mean=True)[1] + testfit2 = lognorm.fit_mle(data2, fix_mean=True)[1] + assert_almost_equal(r_fits[0], testfit1, decimal=5) + assert_almost_equal(r_fits[1], testfit2, decimal=5) + # Scipy code: stats.lognorm.fit(data1, floc=0) + scipy_ans = 1.79518287 + test1 = lognorm.fit_mle(data1)[1] + assert_almost_equal(scipy_ans, test1) class TestExpon(TestCase): pass From e58863f54c9f0b50c88a57e20ea8bef111e43843 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Mon, 14 Apr 2014 16:17:35 -0700 Subject: [PATCH 246/343] Refactor and test empirical o-ring --- macroeco/empirical/__init__.py | 2 +- macroeco/empirical/_empirical.py | 264 +++++++++++---------------- macroeco/empirical/test_empirical.py | 126 +++++-------- 3 files changed, 156 insertions(+), 236 deletions(-) diff --git a/macroeco/empirical/__init__.py b/macroeco/empirical/__init__.py index c899a92..457b776 100644 --- a/macroeco/empirical/__init__.py +++ b/macroeco/empirical/__init__.py @@ -38,5 +38,5 @@ """ from ._empirical import (Patch, - sad, ssad, sar, comm_grid, + sad, ssad, sar, comm_grid, o_ring, empirical_cdf) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 6e5b026..399272a 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -654,205 +654,159 @@ def _yield_spatial_table(patch, div, spp_col, count_col, x_col, y_col): return df - - -def o_ring(self, div_cols, bin_edges, criteria, n0_min_max=None, - edge_correct=False, density=False): - ''' - Calculates univariate O-ring for a species. +@log_start_end +@doc_sub(metric_params, metric_return, cols_note, splits_note) +def o_ring(patch, cols, splits, spp, bin_edges, density=True): + """ + Calculates univariate O-ring for a species Parameters ---------- - div_cols : tuple - Column names containing x and y coordinates of individuals + {0} bin_edges : iterable List of edges of distance classes to bin histogram of distances - criteria : dict - See docstring for Patch.sad. Count column must be used. - n0_min_max : tuple - Optional min and max abundance for species to consider. Useful for - ignoring rare species with few samples and abundant species for - which calculation would take a long time. - edge_correct : bool - Correct histograms by replacing count of individuals at distance - bin with expected count if entire ring at that distance was - available (part of ring may fall outside of plot). Default False. + spp : str + String corresponding to focal species code density : bool If True, return densities (counts divided by area of torus defined - by bin edges) instead of counts. Default False. + by bin edges) instead of counts. Default True. Returns ------- - result : tuple - List of tuples with three elements each. First is combination used - to generate results, second is spp_list for that combination - (includes all species in entire landscape), and third is list of - length spp_list giving histogram of pairwise distances for each - species. + {1} Result has two columns, x and y, that give the distance to the center + of a torus and the number or density of individuals (possibly edge + corrected) found in that torus. Notes ----- - Pairwise distances are directional, giving n(n-1) total distances, as - edge correction is directional. - - If there are no records in a combination, histogram will be None. If - there are records but a species has only one individual, histogram - will be all zeros. - - When using edge_correct or density, the maximum distance used for edge - correction, given by the mean of the last two bin_edge values, should - be no greater than one half the longer dimension of the plot. This - ensures that it is not possible for an entire edge correction buffer - to be outside of the plot, which could lead to divide by zero errors. + If density is False, raw counts of individuals within a distance range, + without any edge correction, are returned. - ''' + Pairwise distances are directional, giving n(n-1) total distances for a + species with n individuals, as edge correction is inherently directional. - spp_list, spp_col, count_col, engy_col, mass, combinations = \ - self.parse_criteria(criteria) + If there are no records for a species in a split, entire result table will + be a dataframe with no records. If there are records but a species has only + one individual, dataframe will have zero count at all torus areas. - bin_edges = np.array(bin_edges) + When using density, the maximum distance used for edge correction, given by + the mean of the last two bin_edge values, should be set to no greater than + one half the diagonal distance across the plot. This ensures that it is not + possible for an entire edge correction buffer to be outside of the plot, + which could lead to divide by zero errors. - result_list = [] + {2} - for comb in combinations: + For the 0-ring analysis, cols must also contain x_col and y_col, giving the + x and y dimensions along which to analyze spatial pattern. - # If comb includes division, cannot also use edge correction - # This would require better parsing of plot boundaries for division - if (not comb.keys() == []) and edge_correct: - raise NotImplementedError("Edge correction cannot be used " - "with combinations.") + {3} - # Get appropriate subtable for this combination - subtable = self.data_table.get_subtable(comb) + """ - # Declare empty list for all histograms for all species - spp_hist_list = [] + (spp_col, count_col, x_col, y_col), patch = \ + _get_cols(['spp_col', 'count_col', 'x_col', 'y_col'], cols, patch) - # If density is True, set edge_correct to True - if density: - edge_correct = True + # Loop through each split + result_list = [] + for substring, subpatch in _yield_subpatches(patch, splits): - # Set up plot polygon for edge correction - if edge_correct: - xmin = self.data_table.meta[(div_cols[0], 'minimum')] - xmax = self.data_table.meta[(div_cols[0], 'maximum')] - ymin = self.data_table.meta[(div_cols[1], 'minimum')] - ymax = self.data_table.meta[(div_cols[1], 'maximum')] + # Get table for just this species + spp_table = subpatch.table[subpatch.table[spp_col] == spp] - plot = geo.box(xmin, ymin, xmax, ymax) + # If spp not present or singleton, continue + if (len(spp_table) == 0): + result_list.append((substring, pd.DataFrame(columns=['x','y']))) + continue - all_r = (bin_edges[:-1] + bin_edges[1:]) / 2 + # Set up plot geometry + plot_poly, radii, torus_areas = \ + _get_plot_geometry(subpatch, bin_edges, x_col, y_col) - # Calculate areas of all toruses - if density: - ring_areas = [] - for i in range(len(bin_edges) - 1): - ring_areas.append(np.pi*(bin_edges[i+1]**2 - - bin_edges[i]**2)) - ring_areas = np.array(ring_areas) + # Get lists of all points and counts in spp_table + x = spp_table[x_col] + y = spp_table[y_col] + points = zip(x,y) + counts = list(spp_table[count_col]) - # Loop all species - for spp in spp_list: + # Arrays to hold summed areas and distance histograms for all points + areas = np.zeros(len(radii)) + hists = np.zeros(len(radii)) - spp_subtable = subtable[subtable[spp_col] == spp] + # Go through each point and associated count + for i, (point, count) in enumerate(zip(points, counts)): - # If spp not present or singleton, continue - # Ensure that if single record but count > 1, do analysis - if len(spp_subtable) == 0: - spp_hist_list.append(None) - continue + # Create list of all other points and counts except this + other_points = points[0:i] + points[i+1:] + other_counts = counts[0:i] + counts[i+1:] - # Get n0, accounting for count col - if count_col: - count = np.sum(spp_subtable[count_col]) + # Get dist from this point to all other points + if other_points: + other_dists = dist.cdist(np.array([point]), + np.array(other_points)) else: - count = len(spp_subtable) + other_dists = np.array(()) - # Skip this spp if there is a min_max set and n0 out of range - if n0_min_max and (count < n0_min_max[0] or count > - n0_min_max[1]): - spp_hist_list.append(None) - continue + # Repeat other point distances to acccount for their counts + other_dists = np.repeat(other_dists, other_counts) - # Get list of all points and all counts - x = spp_subtable[div_cols[0]] - y = spp_subtable[div_cols[1]] - all_points = zip(x,y) - all_counts = list(spp_subtable[count_col]) + # Repeat entire other_dist array to account for count here + other_dists = np.tile(other_dists, count) - # Declare array to hold histogram of pairwise distances - all_hist = np.zeros(len(bin_edges) - 1) + # Add 0's for count at this point to account for count here + # Multiplied by two to get directional pairwise dists + n_others_here = count - 1 + if n_others_here > 0: + other_dists = np.concatenate((other_dists, + np.zeros(n_others_here*2))) - # Declare array to hold all sampled areas per bin + # Calculate histogram of distances to other points + hist, _ = np.histogram(other_dists, bin_edges) + + # Convert histogram to density if desired + corr_factor = np.ones(len(radii)) # Frac length in plot if density: - all_areas = np.zeros(len(ring_areas)) + for i, r in enumerate(radii): + circ = geo.Point(*point).buffer(r, resolution=64) + outside_len = circ.boundary.difference(plot_poly).length + corr_factor[i] = ((circ.boundary.length - outside_len) / + circ.boundary.length) + hist = hist / corr_factor # Edge corrected if density, else same + + hists += hist + areas += (torus_areas * corr_factor) # Only used later if density - # Go through all_points - for i, this_point in enumerate(all_points): + # If density, divide summed torus counts by summed areas + if density: + hists = hists / areas - # Get this point and remove from list of all points - this_count = all_counts[i] + # Append subset result + subresult = pd.DataFrame({'x': radii, 'y': hists}) + result_list.append((substring, subresult)) + + # Return all results + return result_list - # Create list of all other points and counts except this - all_other_points = all_points[0:i] + all_points[i+1:] - all_other_counts = all_counts[0:i] + all_counts[i+1:] - # Get dist from this point to all other points - # If no other points, other_dist is empty - # May still be other individs at this point - if all_other_points: - other_dist = dist.cdist(np.array([this_point]), - np.array(all_other_points)) - else: - other_dist = np.array(()) - - # Repeat other point distances to acccount for their counts - other_dist = np.repeat(other_dist, all_other_counts) - - # Repeat entire other_dist to account for count here - other_dist = np.tile(other_dist, this_count) - - # Add 0 distances between individs at this point - # Multiplied by two to get directional pairwise dists - n_this_dists = this_count - 1 - if n_this_dists > 0: - other_dist = np.concatenate((other_dist, - np.zeros(n_this_dists*2))) - - # Calculate histogram of distances to other points - hist, _ = np.histogram(other_dist, bin_edges) - - # Edge correct distance - if edge_correct: - corr_fact = np.zeros(len(all_r)) - for i, r in enumerate(all_r): - x, y = this_point - circ = geo.Point(x,y).buffer(r,resolution=64) - out_len = circ.boundary.difference(plot).length - in_frac = ((circ.boundary.length - out_len) / - circ.boundary.length) - corr_fact[i] = in_frac - hist = hist / corr_fact - - # Store sampled area at each dist for density calculation - if density: - all_areas += (ring_areas * corr_fact) - - # Add this point results to main histogram - all_hist += hist - - # If density, divide all values by summed sampled torus areas - if density: - all_hist = all_hist / all_areas +def _get_plot_geometry(subpatch, bin_edges, x_col, y_col): - # Append final hist for this species to running list - spp_hist_list.append(all_hist) + # Plot polygon + xmin = eval(subpatch.meta[x_col]['min']) + xmax = eval(subpatch.meta[x_col]['max']) + ymin = eval(subpatch.meta[y_col]['min']) + ymax = eval(subpatch.meta[y_col]['max']) + plot_poly = geo.box(xmin, ymin, xmax, ymax) - # For this comb, create and append tuple to result list - result_list.append((comb, spp_list, spp_hist_list)) + # Radii of toruses + bin_edges = np.array(bin_edges) + radii = (bin_edges[:-1] + bin_edges[1:]) / 2 - return result_list + # Areas of all toruses + torus_areas = [] + for i in range(len(bin_edges) - 1): + torus_areas.append(np.pi * (bin_edges[i+1]**2 - bin_edges[i]**2)) + return plot_poly, radii, np.array(torus_areas) diff --git a/macroeco/empirical/test_empirical.py b/macroeco/empirical/test_empirical.py index 45ab26f..610175c 100644 --- a/macroeco/empirical/test_empirical.py +++ b/macroeco/empirical/test_empirical.py @@ -197,6 +197,52 @@ def test_x_y_division_uneven_y_jaccard(self): comm = emp.comm_grid(self.pat1, self.cols1, '', '2,2',metric='Jaccard') assert_equal(comm[0][1]['y'], [1/2., 0, 0, 0, 1/2., 0]) +class TestORing(Patches): + # TODO: Individuals falling directly on a radius may be allocated + # ambiguously between adjacent toruses + + # TODO: Main may fail with error if dataframe has no records when trying to + # fit or make plot. + + def test_missing_spp_returns_df_with_no_records(self): + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'nothere', [0,.11,.2]) + assert_frame_equal(o_ring[0][1], pd.DataFrame(columns=['x','y'])) + + def test_one_individual_returns_zeros(self): + self.pat1.table = self.pat1.table[2:4] # Leave 1 'a' and 1 'b' + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'a', [0,.11,.2]) + assert_equal(o_ring[0][1]['y'], [0, 0]) + + def test_simple_count_no_density_a(self): + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'a', [0,.11,.2], + density=False) + assert_almost_equal(o_ring[0][1]['x'], [0.055, 0.155]) + assert_almost_equal(o_ring[0][1]['y'], [8, 4]) + + def test_simple_count_no_density_b(self): + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'b', [0,.11,.2], + density=False) + assert_almost_equal(o_ring[0][1]['x'], [0.055, 0.155]) + assert_almost_equal(o_ring[0][1]['y'], [2, 4]) + + def test_simple_count_with_split_a(self): + o_ring = emp.o_ring(self.pat1, self.cols1, 'y:2', 'a', [0,.11,.2], + density=False) + assert_equal(o_ring[0][1]['y'], [2, 0]) # Bottom + assert_equal(o_ring[1][1]['y'], [2, 0]) # Top + + def test_simple_count_with_split_b(self): + o_ring = emp.o_ring(self.pat1, self.cols1, 'y:2', 'b', [0,.11,.2], + density=False) + assert_equal(o_ring[0][1]['y'], []) # Bottom + assert_equal(o_ring[1][1]['y'], [2, 4]) # Top + + def test_density_a(self): + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'b', [0,.05,.1]) + assert_array_almost_equal(o_ring[0][1]['y'], [1358.12218105,0]) + + # TODO: More checks of density (which inclues edge correction) + class TestProduct(): @@ -243,83 +289,3 @@ def test_all_data_same(self): res = emp.empirical_cdf(test_data) assert_array_equal(ans, res['ecdf']) - - - -# def test_comm_sep(self): - -# # Create result recarray -# comm = self.pat9.comm_sep({'plot1': (0,0), 'plot2': (0,1), -# 'plot3': (3,4)}, -# {'spp_code': 'species', 'count': 'count'}) - -# # Create result recarray with dec degree locs -# comm_decdeg = self.pat9.comm_sep({'plot1': (9.1,79.0), -# 'plot2': (9.2,79.5), 'plot3': (12.7,50)}, -# {'spp_code': 'species', 'count': 'count'}, -# loc_unit='decdeg') - -# # Check distances -# dist_sort = np.sort(comm['dist']) -# np.testing.assert_array_almost_equal(dist_sort, np.array((1,4.242,5)), -# 3) - -# # Check distances dec degree -# # TODO: Find exact third party comparison formula - formulas online use -# # different radii, etc. and give approx same answer -# dist_sort = np.sort(comm_decdeg['dist']) -# #np.testing.assert_array_almost_equal(dist_sort, -# # np.array((56.058,3193.507, -# # 3245.820)), 3) - -# # Check species in each plot -# spp_sort = np.sort(np.array(list(comm['spp-a']) + list(comm['spp-b']))) -# np.testing.assert_array_equal(spp_sort, np.array((0,0,3,3,4,4))) - -# # Check Sorensen - 2 zeros from empty plot1 -# sor_sort = np.sort(comm['sorensen']) -# np.testing.assert_array_almost_equal(sor_sort, -# np.array((0,0,0.571428571)), 5) - -# # Check Jaccard - 2 zeros from empty plot1 -# jac_sort = np.sort(comm['jaccard']) -# np.testing.assert_array_almost_equal(jac_sort, np.array((0,0,0.4)), 5) - -# def test_o_ring(self): - -# # Check standard case, no min max, no edge correction, no criteria -# # Tests that distances and repeats for count col are correct -# result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], -# {'spp_code': 'species', 'count': 'count'}) - -# np.testing.assert_array_equal(result_list[0][2][0], np.array((8,4))) -# np.testing.assert_array_equal(result_list[0][2][1], np.array((2,4))) - -# # Check standard case, no min max, no edge correction, with division -# result_list = self.pat1.o_ring(('x','y'), [0,.11,.2], -# {'spp_code': 'species', 'count': 'count', -# 'y': 2}) - -# # - First half of y, both species -# np.testing.assert_array_equal(result_list[0][2][0], np.array((6,0))) -# np.testing.assert_array_equal(result_list[0][2][1], np.array((0,0))) - -# # - Second half of y, both species -# np.testing.assert_array_equal(result_list[1][2][0], np.array((0,0))) -# np.testing.assert_array_equal(result_list[1][2][1], np.array((2,0))) - -# # Check edge correction - check only first species -# # Almost equal required due to float division -# result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], -# {'spp_code': 'species', 'count': 'count'}, -# edge_correct=True) -# np.testing.assert_array_almost_equal(result_list[0][2][0], -# np.array((8,18))) - -# # Check density - check only second species -# print 'here ' -# result_list = self.pat1.o_ring(('x','y'), [0,.05,.1], -# {'spp_code': 'species', 'count': 'count'}, -# density=True) -# np.testing.assert_array_almost_equal(result_list[0][2][1], -# np.array((1358.12218105,0))) From 689d8258b209da74e85d891735bc0302ecba000d Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 15 Apr 2014 00:30:12 -0700 Subject: [PATCH 247/343] added plnorm and plnorm_lowtrunc --- macroeco/models/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 7e1d039..42e550d 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -37,8 +37,8 @@ """ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, - logser_uptrunc, expon, expon_uptrunc, - lognorm) + logser_uptrunc, plnorm, plnorm_lowtrunc, + expon, expon_uptrunc, lognorm) from ._curves import (power_law, mete_sar, mete_iterative_sar, From d2824f43ee32f0ea358c00d29e988f5005891934 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 15 Apr 2014 00:30:47 -0700 Subject: [PATCH 248/343] Moved solve_k fxn --- macroeco/models/_distributions.py | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index d8b9d44..df478d2 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -552,6 +552,38 @@ def _ln_choose(n, k_agg): return gammaln(n + 1) - (gammaln(k_agg + 1) + gammaln(n - k_agg + 1)) +def _solve_k_from_mu(data, k_range, nll, *args): + """ + For given args, return k_agg from searching some k_range. + + Parameters + ---------- + data : array + k_range : array + nll : function + + args : + + Returns + -------- + :float + Minimum k_agg + + """ + # TODO: See if a root finder like fminbound would work with Decimal used in + # logpmf method (will this work with arrays?) + + k_array = np.arange(*k_range) + nll_array = np.zeros(len(k_array)) + + for i in range(len(k_array)): + nll_array[i] = nll(data, k_array[i], *args) + + min_nll_idx = np.argmin(nll_array) + + return k_array[min_nll_idx] + + class logser_uptrunc_gen(rv_discrete_meco): r""" Upper truncated logseries random variable. From 113e136ee5200aac76c3ca3c10eb805f47afdc3f Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 15 Apr 2014 00:31:47 -0700 Subject: [PATCH 249/343] Generalized mean and variance and moved solve_k --- macroeco/models/_distributions.py | 70 +++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index df478d2..7987c3a 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -679,10 +679,11 @@ def _rvs(self, p, b): return np.array(out) def _stats(self, p, b): + vals = np.arange(1, b + 1) - mu = np.sum(vals * self.pmf(vals, p, b)) - var = np.sum(vals ** 2 * self.pmf(vals, p, b)) - mu ** 2 - return mu, var, None, None + full_pmf = self.pmf(vals, p, b) + mean, var = mean_var(vals, full_pmf) + return mean, var, None, None logser_uptrunc = logser_uptrunc_gen(name="logser_uptrunc", shapes="p, b") @@ -922,33 +923,56 @@ def mle(sigma, x, mean): # MLE function for lognormal return -1 * np.sum(tpdf(x, mean, sigma)) -def _solve_k_from_mu(data, k_range, nll, *args): - """ - For given args, return k_agg from searching some k_range. + + +def mean_var(vals, pmf): + # Calculates the mean and variance from vals and pmf + + mean = np.sum(vals * pmf) + var = np.sum(vals ** 2 * pmf) - mean ** 2 + return mean, var + + +def make_rank(pmf, n, min_supp=1): + ''' + Convert any pmf into a rank curve for S species using cumulative + distribution function. Parameters ---------- - data : array - k_range : array - nll : function - - args : + pmf : ndarray + Probability of observing a species from 1 to length pmf individs. + n : int + Total number of samples + min_supp : int + The minimum support of the distribution. Often either 1 or 0. Returns - -------- - :float - Minimum k_agg + ------- + ndarray + 1D array of predicted ranks - """ - # TODO: See if a root finder like fminbound would work with Decimal used in - # logpmf method (will this work with arrays?) + Notes + ----- + Function actually implements (philosophically) a step quantile function. + Use if ppf in rv_discrete_meco is too slow - k_array = np.arange(*k_range) - nll_array = np.zeros(len(k_array)) + ''' - for i in range(len(k_array)): - nll_array[i] = nll(data, k_array[i], *args) + pmf = pmf / np.sum(pmf) # Ensure distribution is normalized - min_nll_idx = np.argmin(nll_array) + points = np.arange(1 / (2 * n), 1, 1 / n) + counts = np.zeros(n) - return k_array[min_nll_idx] + if min_supp == 1: + pmf = np.array([0] + list(pmf)) # Add 0 to start of pmf + cum_pmf = np.cumsum(pmf) + + for cutoff in cum_pmf: + greater_thans = (points >= cutoff) + counts[greater_thans] += 1 + + if not greater_thans.any(): # If no greater thans, done with samples + break + + return counts From 8707310a63611e6c60394fa030b7f09d9d13aad2 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 15 Apr 2014 00:32:25 -0700 Subject: [PATCH 250/343] Added plnorm and plnorm_lowtrunc to _distributions --- macroeco/models/_distributions.py | 224 ++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 7987c3a..8fc277b 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -10,6 +10,7 @@ import scipy.stats as stats import scipy.optimize as optim import scipy.special as special +import scipy.integrate as integrate from ..misc import doc_sub, inherit_docstring_from @@ -724,10 +725,233 @@ def _trunc_logser_solver(bins, b): xtol=1.490116e-08, maxiter=1000) return p + +class plnorm_gen(rv_discrete_meco): + r""" + Poisson lognormal random variable + + Adapted from Bulmer (1974) [#]_ + + Methods + ------- + translate_args(mean, sigma) + not implemented + fit_mle(data) + ml estimate of shape parameters mu and sigma + %(before_notes)s + mu : float + mu parameter of the poisson lognormal + sigma : float + sigma parameter of the poisson lognormal + + Notes + ----- + The pmf method was adopted directly from the VGAM package in R. + The VGAM R package was adopted directly from Bulmer (1974). + + The fit_mle function was adapted from Ethan White's pln_solver function in + macroeco_distributions (https://github.com/weecology/macroecotools) + + References + ---------- + .. [#] + + Bulmer, M. G. (1974). On fitting the poisson lognormal distribution to + species bundance data. Biometrics, 30, 101-110. + + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mean, sigma): + raise NotImplementedError("Translate args not implemented") + + @inherit_docstring_from(rv_discrete_meco) + def fit_mle(self, data): + + mu0 = np.mean(np.log(data)) + sig0 = np.std(np.log(data)) + + def mle(params): + return -np.sum(self.logpmf(data, params[0], params[1])) + + # Bounded fmin? + mu, sigma = optim.fmin(mle, x0=[mu0, sig0], disp=0) + + return mu, sigma + + @inherit_docstring_from(rv_discrete_meco) + def rank(self, n, mu, sigma, upper=100000): + """%(super)s + Uses approximation of rank distribution. Increasing ``upper`` will + give a closer approximation. + """ + + return make_rank(self.pmf(np.arange(upper + 1), mu, sigma), n, + min_supp=0) + + def _argcheck(self, mu, sigma): + return True + + def _pmf(self, x, mu, sigma, approx_cut=10): + + x = np.array(x) + pmf = np.empty(len(x), dtype=np.float) + xbelow = x <= approx_cut + xabove = x > approx_cut + + # If below, use exact answer + if np.sum(xbelow) > 0: + + pmf[xbelow] = plognorm_intg_vec(x[xbelow], mu[xbelow], + sigma[xbelow]) + + # If above, use approximation + if np.sum(xabove) > 0: + + z = (np.log(x[xabove]) - mu[xabove]) / sigma[xabove] + + pmf_above = ((1 + (z**2 + np.log(x[xabove]) - mu[xabove] - 1) / + (2 * x[xabove] * sigma[xabove]**2)) * np.exp(-0.5 * z**2) / + (np.sqrt(2 * np.pi) * sigma[xabove] * x[xabove])) + + pmf[xabove] = pmf_above + + return pmf + + def _cdf(self, x, mu, sigma, approx_cut=10): + + mu = np.atleast_1d(mu) + sigma = np.atleast_1d(sigma) + x = np.atleast_1d(x) + + max_x = np.max(x) + pmf_list = self.pmf(np.arange(np.int(max_x) + 1), mu[0], sigma[0]) + full_cdf = np.cumsum(pmf_list) + + cdf = np.array([full_cdf[tx] for tx in x]) + + return cdf + + def _stats(self, mu, sigma, upper=100000): + # TODO: stats doesn't like the upper argument + + vals = np.arange(0, upper + 1) + full_pmf = self.pmf(vals, mu, sigma) + + mean, var = mean_var(vals, full_pmf) + + return mean, var, None, None + +plnorm = plnorm_gen(name="plnorm", shapes="mu, sigma") + + +class plnorm_lowtrunc_gen(rv_discrete_meco): + """ + Zero-truncated poisson lognormal random variable + + Adapted from Bulmer (1974) [#]_ + + Methods + ------- + translate_args(mean, sigma) + not implemented + fit_mle(data) + ml estimate of shape parameters mu and sigma + %(before_notes)s + mu : float + mu parameter of the poisson lognormal + sigma : float + sigma parameter of the poisson lognormal + + Notes + ----- + The pmf method was adopted directly from the VGAM package in R. + The VGAM R package was adopted directly from Bulmer (1974). + + The fit_mle function was adapted from Ethan White's pln_solver function in + macroeco_distributions (https://github.com/weecology/macroecotools) + + References + ---------- + .. [#] + + Bulmer, M. G. (1974). On fitting the poisson lognormal distribution to + species bundance data. Biometrics, 30, 101-110. + + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mean, sigma): + raise NotImplementedError("Translate args not implemented") + + @inherit_docstring_from(rv_discrete_meco) + def fit_mle(self, data): + + # Copying code...could we make this a generic function with an eval? + # Or would that slow it down too much? + mu0 = np.mean(np.log(data)) + sig0 = np.std(np.log(data)) + + def mle(params): + return -np.sum(np.log(self._pmf(data, params[0], params[1]))) + + # Bounded fmin? + mu, sigma = optim.fmin(mle, x0=[mu0, sig0], disp=0) + + return mu, sigma + + @inherit_docstring_from(rv_discrete_meco) + def rank(self, n, mu, sigma, upper=100000): + """%(super)s + Uses approximation of rank distribution. Increasing ``upper`` will + give a closer approximation. + """ + + return make_rank(self.pmf(np.arange(upper + 1), mu, sigma), n, + min_supp=1) + + def _argcheck(self, mu, sigma): + return True + + def _pmf(self, x, mu, sigma): + x = np.array(x) + return plnorm.pmf(x, mu, sigma) / (1 - plognorm_intg_vec(0, mu, sigma)) + + def _cdf(self, x, mu, sigma): + x = np.array(x) + return plnorm.cdf(x, mu, sigma) / (1 - plognorm_intg_vec(0, mu, sigma)) + + def _stats(self, mu, sigma, upper=100000): + + vals = np.arange(1, upper + 1) + full_pmf = self.pmf(vals, mu, sigma) + mean, var = mean_var(vals, full_pmf) + + return mean, var, None, None + + +plnorm_lowtrunc = plnorm_lowtrunc_gen(name="plnorm_lowtrunc", + shapes="mu, sigma") + +def plognorm_intg(x, mu, sigma): + # Integral for plognorm + eq = lambda t, x, mu, sigma: np.exp(t * x - np.exp(t) - 0.5 * + ((t - mu) / sigma) ** 2) + + intg = integrate.quad(eq, -np.inf, np.inf, args=(x, mu, sigma))[0] + + norm = np.exp(-0.5 * np.log(2 * np.pi * sigma ** 2) - + special.gammaln(x + 1)) + + return norm * intg + +plognorm_intg_vec = np.vectorize(plognorm_intg) + # # Continuous # + class expon_gen(rv_continuous_meco): r""" An exponential continuous random variable. From 5aff71c5591f39d7b841dc776d6031761cd5b4bf Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 16 Apr 2014 00:58:30 -0700 Subject: [PATCH 251/343] Fixed sphinx build errors --- macroeco/compare/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index 0375f1e..9a82519 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -16,10 +16,10 @@ nll lrt AIC - AIC_weights + AIC_compare sum_of_squares r_squared - bin_data + preston_bin """ From ddfd904a6f8c5f55997946e3ae01bdcf146d8989 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 16 Apr 2014 00:58:53 -0700 Subject: [PATCH 252/343] Added plnorm and plnorm_lowtrunc to docs --- macroeco/models/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 42e550d..98f814d 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -20,6 +20,7 @@ expon expon_uptrunc + lognorm Discrete distributions ====================== @@ -32,7 +33,8 @@ nbinom cnbinom logser_uptrunc - lognorm + plnorm + plnorm_lowtrunc """ From 8dbb642ed7b8add9559ab635630cd44ee7c1964b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 16 Apr 2014 01:00:07 -0700 Subject: [PATCH 253/343] Cleaned-up plnorm docs. Sped up plnorm_lowtrunc --- macroeco/models/_distributions.py | 44 +++++++++++++++++-------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 8fc277b..46285d4 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -728,9 +728,7 @@ def _trunc_logser_solver(bins, b): class plnorm_gen(rv_discrete_meco): r""" - Poisson lognormal random variable - - Adapted from Bulmer (1974) [#]_ + Poisson lognormal random variable. Methods ------- @@ -747,7 +745,7 @@ class plnorm_gen(rv_discrete_meco): Notes ----- The pmf method was adopted directly from the VGAM package in R. - The VGAM R package was adopted directly from Bulmer (1974). + The VGAM R package was adopted directly from Bulmer (1974) [#]_ The fit_mle function was adapted from Ethan White's pln_solver function in macroeco_distributions (https://github.com/weecology/macroecotools) @@ -755,7 +753,6 @@ class plnorm_gen(rv_discrete_meco): References ---------- .. [#] - Bulmer, M. G. (1974). On fitting the poisson lognormal distribution to species bundance data. Biometrics, 30, 101-110. @@ -780,12 +777,13 @@ def mle(params): return mu, sigma @inherit_docstring_from(rv_discrete_meco) - def rank(self, n, mu, sigma, upper=100000): + def rank(self, n, mu, sigma, **kwds): """%(super)s - Uses approximation of rank distribution. Increasing ``upper`` will - give a closer approximation. - """ + Uses approximation of rank distribution. The keyword ``upper`` defines + the upper bound used in the approximation. Default is 100000. + """ + upper = kwds.get('upper', 100000) return make_rank(self.pmf(np.arange(upper + 1), mu, sigma), n, min_supp=0) @@ -833,7 +831,6 @@ def _cdf(self, x, mu, sigma, approx_cut=10): return cdf def _stats(self, mu, sigma, upper=100000): - # TODO: stats doesn't like the upper argument vals = np.arange(0, upper + 1) full_pmf = self.pmf(vals, mu, sigma) @@ -846,10 +843,8 @@ def _stats(self, mu, sigma, upper=100000): class plnorm_lowtrunc_gen(rv_discrete_meco): - """ - Zero-truncated poisson lognormal random variable - - Adapted from Bulmer (1974) [#]_ + r""" + Zero-truncated poisson lognormal random variable. Methods ------- @@ -866,7 +861,7 @@ class plnorm_lowtrunc_gen(rv_discrete_meco): Notes ----- The pmf method was adopted directly from the VGAM package in R. - The VGAM R package was adopted directly from Bulmer (1974). + The VGAM R package was adopted directly from Bulmer (1974) [#]_ The fit_mle function was adapted from Ethan White's pln_solver function in macroeco_distributions (https://github.com/weecology/macroecotools) @@ -874,7 +869,6 @@ class plnorm_lowtrunc_gen(rv_discrete_meco): References ---------- .. [#] - Bulmer, M. G. (1974). On fitting the poisson lognormal distribution to species bundance data. Biometrics, 30, 101-110. @@ -914,12 +908,21 @@ def _argcheck(self, mu, sigma): return True def _pmf(self, x, mu, sigma): - x = np.array(x) - return plnorm.pmf(x, mu, sigma) / (1 - plognorm_intg_vec(0, mu, sigma)) + + mu = np.atleast_1d(mu) + sigma = np.atleast_1d(sigma) + + norm = 1 - plognorm_intg_vec(0, mu[0], sigma[0]) + return plnorm.pmf(x, mu, sigma) / norm def _cdf(self, x, mu, sigma): - x = np.array(x) - return plnorm.cdf(x, mu, sigma) / (1 - plognorm_intg_vec(0, mu, sigma)) + + mu = np.atleast_1d(mu) + sigma = np.atleast_1d(sigma) + + norm = 1 - plognorm_intg_vec(0, mu[0], sigma[0]) + return (plnorm.cdf(x, mu, sigma) - + plnorm.cdf(0, mu[0], sigma[0])) / norm def _stats(self, mu, sigma, upper=100000): @@ -947,6 +950,7 @@ def plognorm_intg(x, mu, sigma): plognorm_intg_vec = np.vectorize(plognorm_intg) + # # Continuous # From f0536759883b00846a7f545293112fe8d23c9e62 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 16 Apr 2014 23:44:50 -0700 Subject: [PATCH 254/343] Switched to btgs solver to increase speed --- macroeco/models/_distributions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 46285d4..84a7fd7 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -772,7 +772,7 @@ def mle(params): return -np.sum(self.logpmf(data, params[0], params[1])) # Bounded fmin? - mu, sigma = optim.fmin(mle, x0=[mu0, sig0], disp=0) + mu, sigma = optim.fmin_bfgs(mle, x0=[mu0, sig0], disp=0) return mu, sigma @@ -890,7 +890,7 @@ def mle(params): return -np.sum(np.log(self._pmf(data, params[0], params[1]))) # Bounded fmin? - mu, sigma = optim.fmin(mle, x0=[mu0, sig0], disp=0) + mu, sigma = optim.fmin_bfgs(mle, x0=[mu0, sig0], disp=0) return mu, sigma From 014f31b3361ff0aa1291acdfb68460b6da5adb1a Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 16 Apr 2014 23:47:00 -0700 Subject: [PATCH 255/343] Handle cases when input or pmf is zero --- macroeco/models/_distributions.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 84a7fd7..6478a5a 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -814,6 +814,9 @@ def _pmf(self, x, mu, sigma, approx_cut=10): pmf[xabove] = pmf_above + # If pmf is 0 the likelihood might break + pmf[pmf == 0] = 1e-120 + return pmf def _cdf(self, x, mu, sigma, approx_cut=10): @@ -909,20 +912,28 @@ def _argcheck(self, mu, sigma): def _pmf(self, x, mu, sigma): + x = np.array(x) mu = np.atleast_1d(mu) sigma = np.atleast_1d(sigma) norm = 1 - plognorm_intg_vec(0, mu[0], sigma[0]) - return plnorm.pmf(x, mu, sigma) / norm + pmf_vals = plnorm.pmf(x, mu, sigma) / norm + pmf_vals[x < 1] = 0 + + return pmf_vals def _cdf(self, x, mu, sigma): + x = np.array(x) mu = np.atleast_1d(mu) sigma = np.atleast_1d(sigma) norm = 1 - plognorm_intg_vec(0, mu[0], sigma[0]) - return (plnorm.cdf(x, mu, sigma) - + cdf_vals = (plnorm.cdf(x, mu, sigma) - plnorm.cdf(0, mu[0], sigma[0])) / norm + cdf_vals[x < 1] = 0 + + return cdf_vals def _stats(self, mu, sigma, upper=100000): From fea710d5ea6e1a83558963c8b70064cd734cfc69 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 16 Apr 2014 23:47:22 -0700 Subject: [PATCH 256/343] Unit test plnorm and plnorm_lowtrunc --- macroeco/models/test_distributions.py | 88 +++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 41a92b1..4c16d07 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -349,6 +349,94 @@ def test_fit_mle(self): test1 = lognorm.fit_mle(data1)[1] assert_almost_equal(scipy_ans, test1) + +class TestPlnorm(TestCase): + + def test_pmf(self): + + # Test against R VGAM fxn: dpolono(c(1:10), -1, 3) + r_res = [0.121392844, 0.057692006, 0.035586652, 0.024863530, + 0.018681089, 0.014721035, 0.011998072, 0.010027588, 0.008545518, + 0.007396607] + + test = plnorm.pmf(np.arange(1, 11), -1, 3) + assert_array_almost_equal(r_res, test) + + # Test against macroeco_distributions.pln: + # pln.pmf([0, 50, 1000], 2.34, 5, 0) + + md_res = np.array([2.86468926e-01, 1.51922299e-03, 5.25717609e-05]) + test = plnorm.pmf([0, 50, 1000], 2.34, 5) + assert_array_almost_equal(md_res, test) + + def test_cdf(self): + + # Test against R VGAM fxn: ppolono(c(0, 15, 10000), .1, 2) + r_res = [0.3954088, 0.9048902, 0.9999973] + test = plnorm.cdf([0, 15, 10000], .1, 2) + assert_array_almost_equal(r_res, test, decimal=5) + + # Test against macroeco_distributions: + # pln.cdf([1,2,3], 20, 4, 0) + + md_res = np.array([7.34761277e-07, 1.18860746e-06, 1.67083480e-06]) + test = plnorm.cdf([1, 2, 3], 20, 4) + assert_array_almost_equal(md_res, test, decimal=5) + + def test_fit_mle(self): + + # Test against R poilog: poilogMLE(data, zTrune=FALSE) + data = np.array([1,1,1,1,1,2,2,2,3,3,4,4,5,5,6,6,12,45,67]) + Rfits = (1.31928, 1.18775) + fits = plnorm.fit_mle(data) + assert_array_almost_equal(Rfits, fits, decimal=3) + + # Test against macroeco_distributions + # pln_solver(data, lower_trunc=False) + md_res = (1.3195580310886075, 1.1876019842774048) + assert_array_almost_equal(md_res, fits, decimal=4) + + def test_rank(self): + pass + + +class TestPlnormLowTrunc(TestCase): + + def test_pmf(self): + + # Test against macroeco_distributions: + # pln.pmf([0, 50, 1000], 2.34, 5, 1) + md_res = np.array([0, 2.12916164e-03, 7.36783061e-05]) + test = plnorm_lowtrunc.pmf([0, 50, 1000], 2.34, 5) + + assert_array_almost_equal(md_res, test) + + def test_cdf(self): + + # Test against dpolonorm + # ppolono(c(1,2,3), 4.3, 100) / (1 - ppolono(0, 4.3, 100)) + r_res = [0.007670365, 0.011507417, 0.014065948] + + test = plnorm_lowtrunc.cdf(np.arange(1, 4), 4.3, 100) + assert_array_almost_equal(r_res, test) + + def test_fit_mle(self): + + data = np.array([1,1,1,4,4,4,4,5,5,5,12,44,55,112]) + + # macroeco_distributions fit: pln_solver(data) + md_fits = (1.068510556981163, 1.8800439687956865) + test = plnorm_lowtrunc.fit_mle(data) + assert_array_almost_equal(test, md_fits, decimal=4) + + # R poilog: poilogMLE(data) + r_fits = (1.067620, 1.880646) + assert_array_almost_equal(test, r_fits, decimal=3) + + def test_rank(self): + pass + + class TestExpon(TestCase): pass From 030fa37bdcff945696719e6710fdfd38f5b28345 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 17 Apr 2014 08:24:17 -0700 Subject: [PATCH 257/343] Unit testing plnorm against Grundy values --- macroeco/models/test_distributions.py | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 4c16d07..a40ac5b 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -369,6 +369,40 @@ def test_pmf(self): test = plnorm.pmf([0, 50, 1000], 2.34, 5) assert_array_almost_equal(md_res, test) + # Unit test from test_macroeco_distributions + + # Test values for Poisson lognomal are chosen from Table 1 and Table 2 + # in Grundy Biometrika 38:427-434. + # In Table 1 the values are deducted from 1 which give p(0). + pln_table1 = [[-2.0, 2, '0.9749'], + [-2.0, 8, '0.9022'], + [-2.0, 16, '0.8317'], + [0.5, 2, '0.1792'], + [0.5, 8, '0.2908'], + [0.5, 16, '0.3416'], + [3, 2, '0.0000'], + [3, 8, '0.0069'], + [3, 16, '0.0365']] + + pln_table2 = [[-2.0, 2, '0.0234'], + [-2.0, 8, '0.0538'], + [-2.0, 16, '0.0593'], + [0.5, 2, '0.1512'], + [0.5, 8, '0.1123'], + [0.5, 16, '0.0879'], + [3, 2, '0.0000'], + [3, 8, '0.0065'], + [3, 16, '0.0193']] + + for vals in pln_table1: + test = plnorm.pmf(0, np.log(10 ** vals[0]), vals[1] ** .5) + assert_almost_equal(test, float(vals[2]), decimal=4) + + for vals in pln_table2: + test = plnorm.pmf(1, np.log(10 ** vals[0]), vals[1] ** .5) + assert_almost_equal(test, float(vals[2]), decimal=4) + + def test_cdf(self): # Test against R VGAM fxn: ppolono(c(0, 15, 10000), .1, 2) @@ -396,6 +430,7 @@ def test_fit_mle(self): md_res = (1.3195580310886075, 1.1876019842774048) assert_array_almost_equal(md_res, fits, decimal=4) + def test_rank(self): pass From 3f52aff4938cf9cb22733ea0b99940a6796106d7 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 17 Apr 2014 10:19:29 -0700 Subject: [PATCH 258/343] Add subpackages to setup.py --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index af23c25..31490a4 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ description = 'Analysis of ecological patterns in Python', author = 'Justin Kitzes, Mark Wilber, Chloe Lewis', url = 'https://github.com/jkitzes/macroeco', - packages = ['macroeco'], + packages = ['macroeco', 'macroeco.empirical', 'macroeco.models', + 'macroeco.compare', 'macroeco.main', 'macroeco.misc'], license = 'BSD', ) From 7bf12feb58981312e16c5f4753e7093448aee039 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 11:05:18 -0700 Subject: [PATCH 259/343] In o_ring edge correct counts by default --- macroeco/empirical/_empirical.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 399272a..a226188 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -765,16 +765,17 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): # Convert histogram to density if desired corr_factor = np.ones(len(radii)) # Frac length in plot - if density: - for i, r in enumerate(radii): - circ = geo.Point(*point).buffer(r, resolution=64) - outside_len = circ.boundary.difference(plot_poly).length - corr_factor[i] = ((circ.boundary.length - outside_len) / - circ.boundary.length) - hist = hist / corr_factor # Edge corrected if density, else same + for i, r in enumerate(radii): + circ = geo.Point(*point).buffer(r, resolution=64) + outside_len = circ.boundary.difference(plot_poly).length + corr_factor[i] = ((circ.boundary.length - outside_len) / + circ.boundary.length) + + hist = hist / corr_factor # Edge corrected hist + hist[corr_factor == 0] = 0 # If corr_factor 0, hist should be 0 hists += hist - areas += (torus_areas * corr_factor) # Only used later if density + areas += torus_areas # If density, divide summed torus counts by summed areas if density: From a91a7eee631f0718bdcc1947d3588775a8003e12 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 11:06:16 -0700 Subject: [PATCH 260/343] Use native calculations in expon_uptrunc --- macroeco/models/_distributions.py | 38 +++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 6478a5a..012097a 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -419,7 +419,6 @@ def _argcheck(self, mu, k_agg): return (k_agg >= 0) & (p >= 0) & (p <= 1) def _pmf(self, x, mu, k_agg): - p = self._get_p_from_mu(mu, k_agg) return np.exp(self._logpmf(x, mu, k_agg)) def _logpmf(self, x, mu, k_agg): @@ -1022,7 +1021,7 @@ class expon_uptrunc_gen(rv_continuous_meco): .. math:: - f(x) = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda x}} + f(x) = \frac{\lambda e^{-\lambda x}}{1 - e^{-\lambda b}} for ``b >= x >= 0``. The ``loc`` and ``scale`` parameters are not used. @@ -1047,7 +1046,7 @@ class expon_uptrunc_gen(rv_continuous_meco): @inherit_docstring_from(rv_continuous_meco) def translate_args(self, mu, b): - raise NotImplementedError, "Translation of mu to lam not implemented" + return _expon_solve_lam_from_mu_vect(mu, b), b @inherit_docstring_from(rv_continuous_meco) def fit_mle(self, data, b=None): @@ -1063,27 +1062,32 @@ def fit_mle(self, data, b=None): expon = expon_gen(a=0.0, b=b) return 1/expon.fit(data, floc=0)[2], b - def _rvs(self, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.rvs(lam) + def _argcheck(self, lam, b): + return True def _pdf(self, x, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.pdf(x, lam) + return (lam * np.exp(-lam*x)) / (1 - np.exp(-lam*b)) def _cdf(self, x, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.cdf(x, lam) + return (1 - np.exp(-lam*x)) / (1 - np.exp(-lam*b)) - def _entropy(self, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.entropy(lam) +expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') - def _stats(self, lam, b): - expon = expon_gen(a=0.0, b=b) - return expon.stats(lam) +def _expon_solve_lam_from_mu(mu, b): + """ + For the expon_uptrunc, given mu and b, return lam. + Similar to geom_uptrunc + """ -expon_uptrunc = expon_uptrunc_gen(a=0.0, name='expon_uptrunc', shapes='lam, b') + def lam_eq(lam, mu, b): + # Small offset added to denominator to avoid 0/0 erors + lam, mu, b = Decimal(lam), Decimal(mu), Decimal(b) + return ( (1 - (lam*b + 1) * np.exp(-lam*b)) / + (lam - lam * np.exp(-lam*b) + Decimal(1e-32)) - mu ) + + return optim.brentq(lam_eq, -100, 100, args=(mu, b), disp=True) + +_expon_solve_lam_from_mu_vect = np.vectorize(_expon_solve_lam_from_mu) class lognorm_gen(rv_continuous_meco): From cbe99d5c41948f51b28b4e30bc7be10ce2107ec8 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 11:08:44 -0700 Subject: [PATCH 261/343] Fix typo on geom pmf test --- macroeco/models/test_distributions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index a40ac5b..8887f52 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -9,13 +9,14 @@ from macroeco.models import * from macroeco.models._distributions import _trunc_logser_solver import matplotlib.pyplot as plt +import scipy as sp import scipy.stats as stats class TestGeom(TestCase): def test_pmf(self): - vals = geom_uptrunc.pmf([0,1,2], 0.25, 2) + vals = geom.pmf([0,1,2], 0.25) assert_array_almost_equal(vals, np.array([0.25, 0.1875, 0.140625])) def test_mean(self): From 31f606b469f7afdb4398e27afd1299a31280e5d3 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 13:58:19 -0700 Subject: [PATCH 262/343] Clean up and test expon and expon_uptrunc --- macroeco/models/_distributions.py | 38 +++++++------ macroeco/models/test_distributions.py | 78 ++++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 19 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 012097a..269bb01 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -380,7 +380,8 @@ class nbinom_gen(rv_discrete_meco): translate_args(mu, k_agg) not used, returns mu and k_agg. fit_mle(data, k_range=(0.1,100,0.1)) - ml estimate of shape parameters mu and k_agg given data, with k_agg evaluated at (min, max, step) values given by k_range. + ml estimate of shape parameters mu and k_agg given data, with k_agg + evaluated at (min, max, step) values given by k_range. %(before_notes)s mu : float distribution mean @@ -478,7 +479,8 @@ class cnbinom_gen(rv_discrete_meco): translate_args(mu, k_agg, b) not used, returns mu, k_agg, and b. fit_mle(data, k_range=(0.1,100,0.1)) - ml estimate of shape parameters mu and k_agg given data, with k_agg evaluated at (min, max, step) values given by k_range. + ml estimate of shape parameters mu and k_agg given data, with k_agg + evaluated at (min, max, step) values given by k_range. %(before_notes)s mu : float distribution mean @@ -994,8 +996,8 @@ def translate_args(self, mu): @inherit_docstring_from(rv_continuous_meco) def fit_mle(self, data): - expon = expon_gen(a=0.0) - return 1 / expon.fit(data, floc=0)[2], + # MLE is method of moments for exponential + return 1 / (np.sum(data) / len(data)) def _rvs(self, lam): return nprand.exponential(1/lam, self._size) @@ -1039,11 +1041,6 @@ class expon_uptrunc_gen(rv_continuous_meco): """ - # Internally, class works by creating a new expon_gen object with the - # appropriate upper limit and calling its methods. - - # TODO: Do all of these broadcast correctly, or should we call _pdf, etc.? - @inherit_docstring_from(rv_continuous_meco) def translate_args(self, mu, b): return _expon_solve_lam_from_mu_vect(mu, b), b @@ -1057,10 +1054,17 @@ def fit_mle(self, data, b=None): b : float The upper limit of the distribution """ + # Take mean of data as MLE of distribution mean, then calculate p + mu = np.mean(data) if not b: b = np.sum(data) - expon = expon_gen(a=0.0, b=b) - return 1/expon.fit(data, floc=0)[2], b + lam = _expon_solve_lam_from_mu_vect(mu, b) + + # Just return float, not len 1 array + if len(np.atleast_1d(lam)) == 1: + return float(lam), b + else: + return lam, b def _argcheck(self, lam, b): return True @@ -1121,10 +1125,10 @@ def translate_args(self, mean, sigma): def fit_mle(self, data, fix_mean=False): """%(super)s -Additional Parameters ----------------------- -fix_mean : bool - Default False. If True, fixes mean before optimizing sigma + Additional Parameters + ---------------------- + fix_mean : bool + Default False. If True, fixes mean before optimizing sigma """ @@ -1177,7 +1181,7 @@ def mean_var(vals, pmf): def make_rank(pmf, n, min_supp=1): - ''' + """ Convert any pmf into a rank curve for S species using cumulative distribution function. @@ -1200,7 +1204,7 @@ def make_rank(pmf, n, min_supp=1): Function actually implements (philosophically) a step quantile function. Use if ppf in rv_discrete_meco is too slow - ''' + """ pmf = pmf / np.sum(pmf) # Ensure distribution is normalized diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 8887f52..8b4981e 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -474,9 +474,83 @@ def test_rank(self): class TestExpon(TestCase): - pass + + def test_pdf(self): + vals = expon.pdf([0,1,2], 2.5) + assert_almost_equal(vals, [2.5, 0.205212497, 0.016844867]) + + def test_mean(self): + mu1 = expon.mean(0.5) + assert_almost_equal(mu1, 2) + + mu2 = expon.mean(0.25) + assert_almost_equal(mu2, 4) + + def test_cdf(self): + vals = expon.cdf([0,1,2], 0.5) + assert_array_almost_equal(vals, [0, 0.39346934, 0.632120559]) + + def test_translate_args(self): + assert_almost_equal(1/13, expon.translate_args(13)) + + def test_fit_mle(self): + assert_almost_equal(1/8, expon.fit_mle([6,7,9,10])) class TestExponUptrunc(TestCase): - pass + + def test_pdf(self): + vals = expon_uptrunc.pdf([0,1,2], 0.2, 10) + assert_almost_equal(vals, [0.231303529, 0.189375312, 0.155047392]) + + def test_pdf_lambda_equal_zero_is_uniform(self): + vals = expon_uptrunc.pdf([0,1,2], 0.0000001, 10) + assert_almost_equal(vals, [0.1, 0.1, 0.1]) + + def test_pdf_integrates_to_one(self): + val1 = sp.integrate.quad(expon_uptrunc.pdf, 0, 10, (0.2, 10)) + assert_almost_equal(val1[0], 1) + + val2 = sp.integrate.quad(expon_uptrunc.pdf, 0, 100, (.000000001, 100)) + assert_almost_equal(val2[0], 1) + + val3 = sp.integrate.quad(expon_uptrunc.pdf, 0, 100, (-5, 100)) + assert_almost_equal(val3[0], 1) + + def test_mean_lambda_equal_zero(self): + # If lam zero (uniform distribution), mean should be 1/2 b + assert_almost_equal(expon_uptrunc.mean(0.0000001, 10), 5, 5) + + def test_mean(self): + def integrand(x, lam, b): + return x * expon_uptrunc.pdf(x, lam, b) + + for lam in [2, 4.5]: + val = sp.integrate.quad(integrand, 0, 5, args=(lam, 10))[0] + assert_almost_equal(expon_uptrunc.mean(lam, 5), val, 4) + + def test_cdf(self): + vals = expon_uptrunc.cdf([0,1,2], 0.2, 10) + assert_array_almost_equal(vals, [0, 0.209641082, 0.381280683]) + + def test_translate_args_uniform_case(self): + lam = expon_uptrunc.translate_args(5, 10) + assert_almost_equal(lam[0], 0) + + def test_translate_args(self): + # mean -> lambda -> mean comparison + lam = expon_uptrunc.translate_args(3, 10) + assert_almost_equal(expon_uptrunc.mean(lam, 10), 3) + + def test_fit_mle_uniform_case(self): + data = [5,5,5] + mean = np.mean(data) + lam = expon_uptrunc.fit_mle(data, 10)[0] + assert_almost_equal(expon_uptrunc.mean(lam, 10), 5, 4) + + def test_fit_mle(self): + data = [4,5,7,8] + mean = np.mean(data) + lam = expon_uptrunc.fit_mle(data, 10)[0] + assert_almost_equal(expon_uptrunc.mean(lam, 10), 6) From f2915ea244339107457683d26828b802f616796e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 14:27:55 -0700 Subject: [PATCH 263/343] Allow main to handle shape strings with spaces --- macroeco/main/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 81a5f08..b28562a 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -258,7 +258,8 @@ def _arg_kwarg_lists(module, analysis): try: obj_meth = analysis.split('.') if obj_meth[1] not in ['fit_mle', 'translate_args']: - arg_names += eval(module+'.'+obj_meth[0]+'.'+"shapes.split(',')") + arg_names += eval(module + '.' + obj_meth[0] + '.' + + "shapes.replace(' ','').split(',')") except: pass From a6aaac189c90a2ee33572018a2de3975596f058a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 14:28:10 -0700 Subject: [PATCH 264/343] Few cleanups in distributions --- macroeco/models/_distributions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 269bb01..f358060 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -816,6 +816,8 @@ def _pmf(self, x, mu, sigma, approx_cut=10): pmf[xabove] = pmf_above # If pmf is 0 the likelihood might break + # TODO: This should be fixed in likelihood function as it might apply + # to other distributions pmf[pmf == 0] = 1e-120 return pmf @@ -843,7 +845,7 @@ def _stats(self, mu, sigma, upper=100000): return mean, var, None, None -plnorm = plnorm_gen(name="plnorm", shapes="mu, sigma") +plnorm = plnorm_gen(name='plnorm', shapes='mu,sigma') class plnorm_lowtrunc_gen(rv_discrete_meco): @@ -946,7 +948,7 @@ def _stats(self, mu, sigma, upper=100000): plnorm_lowtrunc = plnorm_lowtrunc_gen(name="plnorm_lowtrunc", - shapes="mu, sigma") + shapes='mu,sigma') def plognorm_intg(x, mu, sigma): # Integral for plognorm From 07a413bf5e46f737f801fa36bf63b41e6f82b99f Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 14:34:16 -0700 Subject: [PATCH 265/343] Commit demo Anza-Borrego data set and parameters file --- demo/ANBO.csv | 122 ++++++++++++++++++++++++++++++++++++++++++++ demo/ANBO.txt | 26 ++++++++++ demo/parameters.txt | 21 ++++++++ 3 files changed, 169 insertions(+) create mode 100644 demo/ANBO.csv create mode 100644 demo/ANBO.txt create mode 100644 demo/parameters.txt diff --git a/demo/ANBO.csv b/demo/ANBO.csv new file mode 100644 index 0000000..1fa6598 --- /dev/null +++ b/demo/ANBO.csv @@ -0,0 +1,122 @@ +year,cell,row,column,spp,count +2010,1,3.0,3.0,cabr,3.0 +2010,1,3.0,3.0,caspi1,20.0 +2010,1,3.0,3.0,crcr,3.0 +2010,1,3.0,3.0,crsp2,1.0 +2010,1,3.0,3.0,gnwe,11.0 +2010,1,3.0,3.0,grass,11.0 +2010,1,3.0,3.0,lesp1,1.0 +2010,1,3.0,3.0,phdi,5.0 +2010,1,3.0,3.0,pypo,6.0 +2010,1,3.0,3.0,ticr,50.0 +2010,2,3.0,2.0,caspi1,17.0 +2010,2,3.0,2.0,comp1,2.0 +2010,2,3.0,2.0,crsp2,7.0 +2010,2,3.0,2.0,gnwe,4.0 +2010,2,3.0,2.0,grass,26.0 +2010,2,3.0,2.0,phdi,7.0 +2010,2,3.0,2.0,pypo,8.0 +2010,2,3.0,2.0,ticr,12.0 +2010,2,3.0,2.0,unsp1,1.0 +2010,3,3.0,1.0,arsp1,1.0 +2010,3,3.0,1.0,caspi1,9.0 +2010,3,3.0,1.0,crsp2,8.0 +2010,3,3.0,1.0,grass,120.0 +2010,3,3.0,1.0,mobe,4.0 +2010,3,3.0,1.0,phdi,14.0 +2010,3,3.0,1.0,pypo,12.0 +2010,3,3.0,1.0,ticr,7.0 +2010,3,3.0,1.0,unsp1,1.0 +2010,4,3.0,0.0,crcr,23.0 +2010,4,3.0,0.0,crsp2,13.0 +2010,4,3.0,0.0,gnwe,1.0 +2010,4,3.0,0.0,grass,160.0 +2010,4,3.0,0.0,magl,1.0 +2010,4,3.0,0.0,phdi,14.0 +2010,4,3.0,0.0,pypo,6.0 +2010,4,3.0,0.0,ticr,3.0 +2010,5,2.0,3.0,cabr,9.0 +2010,5,2.0,3.0,caspi1,12.0 +2010,5,2.0,3.0,crsp2,1.0 +2010,5,2.0,3.0,gnwe,13.0 +2010,5,2.0,3.0,grass,180.0 +2010,5,2.0,3.0,pypo,5.0 +2010,5,2.0,3.0,ticr,120.0 +2010,6,2.0,2.0,crsp2,15.0 +2010,6,2.0,2.0,grass,115.0 +2010,6,2.0,2.0,phdi,15.0 +2010,6,2.0,2.0,pypo,10.0 +2010,6,2.0,2.0,unsp1,15.0 +2010,7,2.0,1.0,crsp2,9.0 +2010,7,2.0,1.0,grass,12.0 +2010,7,2.0,1.0,phdi,42.0 +2010,8,2.0,0.0,arsp1,1.0 +2010,8,2.0,0.0,crcr,12.0 +2010,8,2.0,0.0,crsp2,6.0 +2010,8,2.0,0.0,grass,110.0 +2010,8,2.0,0.0,phdi,27.0 +2010,8,2.0,0.0,pypo,1.0 +2010,9,1.0,3.0,cabr,7.0 +2010,9,1.0,3.0,enfa,1.0 +2010,9,1.0,3.0,phdi,39.0 +2010,9,1.0,3.0,pypo,7.0 +2010,10,1.0,2.0,cabr,4.0 +2010,10,1.0,2.0,comp1,1.0 +2010,10,1.0,2.0,crcr,3.0 +2010,10,1.0,2.0,crsp2,3.0 +2010,10,1.0,2.0,gnwe,4.0 +2010,10,1.0,2.0,grass,20.0 +2010,10,1.0,2.0,phdi,10.0 +2010,10,1.0,2.0,pypo,6.0 +2010,11,1.0,1.0,comp1,1.0 +2010,11,1.0,1.0,crcr,6.0 +2010,11,1.0,1.0,crsp2,6.0 +2010,11,1.0,1.0,gnwe,3.0 +2010,11,1.0,1.0,grass,86.0 +2010,11,1.0,1.0,mesp,1.0 +2010,11,1.0,1.0,phdi,8.0 +2010,11,1.0,1.0,pypo,2.0 +2010,11,1.0,1.0,ticr,7.0 +2010,11,1.0,1.0,unsh1,1.0 +2010,11,1.0,1.0,unsp3,1.0 +2010,12,1.0,0.0,cabr,5.0 +2010,12,1.0,0.0,cran,1.0 +2010,12,1.0,0.0,crcr,10.0 +2010,12,1.0,0.0,crsp2,7.0 +2010,12,1.0,0.0,gnwe,5.0 +2010,12,1.0,0.0,grass,88.0 +2010,12,1.0,0.0,phdi,14.0 +2010,12,1.0,0.0,pypo,1.0 +2010,12,1.0,0.0,ticr,70.0 +2010,13,0.0,3.0,cabr,1.0 +2010,13,0.0,3.0,cran,1.0 +2010,13,0.0,3.0,crcr,2.0 +2010,13,0.0,3.0,grass,60.0 +2010,13,0.0,3.0,phdi,4.0 +2010,13,0.0,3.0,pypo,4.0 +2010,13,0.0,3.0,ticr,80.0 +2010,13,0.0,3.0,unsp1,1.0 +2010,14,0.0,2.0,comp1,1.0 +2010,14,0.0,2.0,crcr,1.0 +2010,14,0.0,2.0,grass,60.0 +2010,14,0.0,2.0,mesp,2.0 +2010,14,0.0,2.0,ticr,140.0 +2010,15,0.0,1.0,cran,1.0 +2010,15,0.0,1.0,crcr,2.0 +2010,15,0.0,1.0,crsp2,3.0 +2010,15,0.0,1.0,grass,20.0 +2010,15,0.0,1.0,mesp,3.0 +2010,15,0.0,1.0,phdi,3.0 +2010,15,0.0,1.0,pypo,2.0 +2010,15,0.0,1.0,sasp,2.0 +2010,15,0.0,1.0,ticr,100.0 +2010,16,0.0,0.0,cabr,2.0 +2010,16,0.0,0.0,chst,1.0 +2010,16,0.0,0.0,cran,1.0 +2010,16,0.0,0.0,crcr,3.0 +2010,16,0.0,0.0,grass,42.0 +2010,16,0.0,0.0,phdi,8.0 +2010,16,0.0,0.0,plsp1,1.0 +2010,16,0.0,0.0,pypo,3.0 +2010,16,0.0,0.0,ticr,140.0 +2010,16,0.0,0.0,unsp4,1.0 diff --git a/demo/ANBO.txt b/demo/ANBO.txt new file mode 100644 index 0000000..0a39e08 --- /dev/null +++ b/demo/ANBO.txt @@ -0,0 +1,26 @@ +[Description] +name = Anzo Borrego +author = John Harte and Mary Ellen Harte +description = Vegetation census conducted at Anza-Borrego Desert State Park. Site in Indian Valley at N 32' 52.091", W 116' 14.447". Elevation 1195 feet. +citation = Unpublished + +datapath = ANBO.csv +cols = spp_col:spp + +[year] + +[cell] + +[row] +min = 0 +max = 3 +step = 1 + +[column] +min = 0 +max = 3 +step = 1 + +[spp] + +[count] diff --git a/demo/parameters.txt b/demo/parameters.txt new file mode 100644 index 0000000..8ae3185 --- /dev/null +++ b/demo/parameters.txt @@ -0,0 +1,21 @@ +[Plognorm pmf] +analysis = plnorm.pmf + +x = np.arange(10) +mu = 2 +sigma = 1. + +[SAD ANBO Row > 2] +analysis = sad +metadata = ANBO.txt + +subset = row>=2 +log_y = True + +[Comm ANBO] +analysis = comm_grid +metadata = ANBO.txt + +cols = spp_col:spp; count_col:count; x_col:row; y_col:column +divs = 4,4; +models = power_law From 8f1d4965da95ac5cffaf41e39fa8d220e61c595b Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 15:47:10 -0700 Subject: [PATCH 266/343] Convert README to rst for consistency with Sphix docs --- README.md => README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename README.md => README.rst (79%) diff --git a/README.md b/README.rst similarity index 79% rename from README.md rename to README.rst index ee02844..da670a7 100644 --- a/README.md +++ b/README.rst @@ -3,6 +3,6 @@ Macroeco: Ecological pattern analysis in Python Macroeco is a Python package that provides a comprehensive set of functions for analyzing empirical patterns in ecological data, predicting patterns from theory, and comparing empirical results to theory. Many major macroecological patterns can be analyzed using this package, including the species abundance distribution, the species and endemics area relationships, several measures of beta diversity, and many others. -Extensive documentation for macroeco, including detailed installation instructions, tutorials, and a reference guide, is available at http://macroeco.org. The most recent stable version of the macroeco package can be installed from PyPi (`pip install macroeco`). For users who do not program in Python, a standalone application called Macroeco Desktop, which provides most of the functionality of macroeco through a simple interface that requires no programming, is also available. +Extensive documentation for macroeco, including detailed installation instructions, tutorials, and a reference guide, is available at http://macroeco.org. The most recent stable version of the macroeco package can be installed from PyPi (``pip install macroeco``). For users who do not program in Python, a standalone application called Macroeco Desktop, which provides most of the functionality of macroeco through a simple interface that requires no programming, is also available. The current version of macroeco was developed at the University of California, Berkeley by Justin Kitzes and Mark Wilber and is maintained by Justin Kitzes. Other contributors include Chloe Lewis and Ethan White. The development of macroeco has been supported by the National Science Foundataion, the Gordon and Betty Moore Foundation, and the Berkeley Institute for Global Change Biology. From d05f99a9e91ba01f3f6c70542b4d62296d61e106 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 15:57:48 -0700 Subject: [PATCH 267/343] Update authors in ANBO metadata --- demo/ANBO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/ANBO.txt b/demo/ANBO.txt index 0a39e08..cc09bbb 100644 --- a/demo/ANBO.txt +++ b/demo/ANBO.txt @@ -1,6 +1,6 @@ [Description] name = Anzo Borrego -author = John Harte and Mary Ellen Harte +author = Mary Ellen Harte and John Harte description = Vegetation census conducted at Anza-Borrego Desert State Park. Site in Indian Valley at N 32' 52.091", W 116' 14.447". Elevation 1195 feet. citation = Unpublished From e1af4f2e214890c35f29681ae73cf6d697a5e947 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 16:59:50 -0700 Subject: [PATCH 268/343] Change name desktop to mecodesktop for consistency with console script --- desktop.py => mecodesktop.py | 0 desktop_mac.spec => mecodesktop_mac.spec | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename desktop.py => mecodesktop.py (100%) rename desktop_mac.spec => mecodesktop_mac.spec (100%) diff --git a/desktop.py b/mecodesktop.py similarity index 100% rename from desktop.py rename to mecodesktop.py diff --git a/desktop_mac.spec b/mecodesktop_mac.spec similarity index 100% rename from desktop_mac.spec rename to mecodesktop_mac.spec From 2ff3e5af820e520facdfe878fa6b8ab4686c9c6e Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 17:23:00 -0700 Subject: [PATCH 269/343] Complete setup.py --- macroeco/__init__.py | 17 ++++++++++------- setup.py | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/macroeco/__init__.py b/macroeco/__init__.py index 882bdb9..f374414 100644 --- a/macroeco/__init__.py +++ b/macroeco/__init__.py @@ -40,16 +40,19 @@ """ -__author__ = "Justin Kitzes and Mark Wilber" -__copyright__ = "Copyright 2012-2014, Regents of University of California" -__license__ = "BSD 2-clause" -__version__ = "0.3" -__maintainer__ = "Justin Kitzes" -__email__ = "jkitzes@berkeley.edu" -__status__ = "Development" +import sys as _sys + +__version__ = '0.3' import empirical import models import compare import main import misc + +def mecodesktop(): + if len(_sys.argv) > 1: + param_path = _sys.argv[1] + main.main(param_path) + else: + print "Macroeco Desktop must be called with path to parameters file" \ No newline at end of file diff --git a/setup.py b/setup.py index 31490a4..a79a8df 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,37 @@ -try: - from setuptools import setup -except ImportError: - from distutils.core import setup +from setuptools import setup, find_packages +from macroeco import __version__ setup( name = 'macroeco', - version= '0.3', - description = 'Analysis of ecological patterns in Python', - author = 'Justin Kitzes, Mark Wilber, Chloe Lewis', - url = 'https://github.com/jkitzes/macroeco', - packages = ['macroeco', 'macroeco.empirical', 'macroeco.models', - 'macroeco.compare', 'macroeco.main', 'macroeco.misc'], + version = __version__, + packages = find_packages(), + entry_points = {'console_scripts': ['mecodesktop=macroeco:mecodesktop',],}, + package_data = {'': ['*.txt', '*.csv']}, + + author = 'Justin Kitzes and Mark Wilber', + author_email = 'jkitzes@berkeley.edu', + description = 'Ecological pattern analysis in Python', + long_description = open('README.rst').read(), license = 'BSD', + keywords = ('ecology biology environment conservation biodiversity ' + 'informatics data science'), + url = 'http://github.com/jkitzes/macroeco', + + classifiers = [ + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License",], + + install_requires = [ + 'numpy>=1.6', + 'scipy>=0.12', + 'pandas>=0.13', + 'matplotlib', + # 'shapely', # Do not force install if user doesn't have + 'configparser', + 'decorator', + 'twiggy'], ) + +# python setup.py sdist bdist_egg upload -r https://testpypi.python.org/pypi \ No newline at end of file From 0aa5b2aad860165c67d5a96c5eba55750db12769 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 17:23:14 -0700 Subject: [PATCH 270/343] Cleanups to Sphinx conf file --- doc/conf.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 76fd7e0..f9b5a06 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,15 +10,17 @@ # # All configuration values have a default; values that are commented out # serve to show the default. - -import sys, os - +# # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('..')) #sys.path.insert(0, os.path.abspath('_ext/numpydoc')) +import sys +import os +from macroeco import __version__ + # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. @@ -49,7 +51,7 @@ # General information about the project. project = u'macroeco' -copyright = u'2013-2014, Justin Kitzes and Mark Wilber' +# copyright = u'2013-2014, Justin Kitzes and Mark Wilber' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -94,6 +96,7 @@ # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] +# Mock objects that RTC can't build/import class Mock(object): __all__ = [] @@ -122,10 +125,8 @@ def __getattr__(cls, name): # -- Options for HTML output --------------------------------------------------- -# on_rtd is whether we are on readthedocs.org -import os +# Use local RTD theme if building locally on_rtd = os.environ.get('READTHEDOCS', None) == 'True' - if not on_rtd: # only import and set the theme if we're building docs locally import sphinx_rtd_theme html_theme = 'sphinx_rtd_theme' @@ -229,7 +230,7 @@ def __getattr__(cls, name): # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'macroeco.tex', u'macroeco Documentation', + ('index', 'macroeco.tex', u'Macroeco', u'Justin Kitzes and Mark Wilber', 'manual'), ] @@ -259,7 +260,7 @@ def __getattr__(cls, name): # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'macroeco', u'macroeco Documentation', + ('index', 'macroeco', u'Macroeco', [u'Justin Kitzes and Mark Wilber'], 1) ] @@ -273,9 +274,9 @@ def __getattr__(cls, name): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'macroeco', u'macroeco Documentation', + ('index', 'macroeco', u'Macroeco', u'Justin Kitzes and Mark Wilber', 'macroeco', - 'One line description of project.', + 'Ecological pattern analysis in Python', 'Miscellaneous'), ] From 1fe39ea2ae7503c559b7d4b4a93752974b5cb973 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 17:27:37 -0700 Subject: [PATCH 271/343] Revert multiline docstring format causing RTD failure --- macroeco/models/_distributions.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index f358060..0126f5f 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -380,8 +380,7 @@ class nbinom_gen(rv_discrete_meco): translate_args(mu, k_agg) not used, returns mu and k_agg. fit_mle(data, k_range=(0.1,100,0.1)) - ml estimate of shape parameters mu and k_agg given data, with k_agg - evaluated at (min, max, step) values given by k_range. + ml estimate of shape parameters mu and k_agg given data %(before_notes)s mu : float distribution mean @@ -400,7 +399,7 @@ def fit_mle(self, data, k_range=(0.1, 100, 0.1)): In addition to data, gives an optional keyword argument k_range contains a tuple of the start, stop, and step values to search for - k_agg. default is ``k_range=(0.1,100,0.1)``. a brute force search is + k_agg. default is ``k_range=(0.1,100,0.1)``. A brute force search is then used to find the parameter k_agg. """ @@ -479,8 +478,7 @@ class cnbinom_gen(rv_discrete_meco): translate_args(mu, k_agg, b) not used, returns mu, k_agg, and b. fit_mle(data, k_range=(0.1,100,0.1)) - ml estimate of shape parameters mu and k_agg given data, with k_agg - evaluated at (min, max, step) values given by k_range. + ml estimate of shape parameters mu and k_agg given data %(before_notes)s mu : float distribution mean From 62a4c96c652e63bddfb1a20eff6e94d8f6e00ca5 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 19 Apr 2014 17:31:33 -0700 Subject: [PATCH 272/343] Fix order of imports in conf --- doc/conf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index f9b5a06..2ab0417 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -14,13 +14,14 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('..')) -#sys.path.insert(0, os.path.abspath('_ext/numpydoc')) import sys import os from macroeco import __version__ +sys.path.insert(0, os.path.abspath('..')) +#sys.path.insert(0, os.path.abspath('_ext/numpydoc')) + # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. From 2b1d4ad8e6d8f086cfe791ecc86a417131fb4ea0 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sat, 19 Apr 2014 18:27:10 -0700 Subject: [PATCH 273/343] Changed plnorm_lowtrunc to plnorm_ztrunc --- macroeco/models/__init__.py | 4 ++-- macroeco/models/_distributions.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 98f814d..819bad1 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -34,12 +34,12 @@ cnbinom logser_uptrunc plnorm - plnorm_lowtrunc + plnorm_ztrunc """ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, - logser_uptrunc, plnorm, plnorm_lowtrunc, + logser_uptrunc, plnorm, plnorm_ztrunc, expon, expon_uptrunc, lognorm) from ._curves import (power_law, diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 0126f5f..2403fb6 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -846,7 +846,7 @@ def _stats(self, mu, sigma, upper=100000): plnorm = plnorm_gen(name='plnorm', shapes='mu,sigma') -class plnorm_lowtrunc_gen(rv_discrete_meco): +class plnorm_ztrunc_gen(rv_discrete_meco): r""" Zero-truncated poisson lognormal random variable. @@ -945,8 +945,9 @@ def _stats(self, mu, sigma, upper=100000): return mean, var, None, None -plnorm_lowtrunc = plnorm_lowtrunc_gen(name="plnorm_lowtrunc", - shapes='mu,sigma') +plnorm_ztrunc = plnorm_ztrunc_gen(name="plnorm_ztrunc", + shapes='mu, sigma') + def plognorm_intg(x, mu, sigma): # Integral for plognorm From c3287dd7f0d6b4f4b1c5471143c03c4fe3ca0c06 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sat, 19 Apr 2014 18:33:14 -0700 Subject: [PATCH 274/343] Renamed plnorm_lowtrunc in unit tests --- macroeco/models/test_distributions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 8b4981e..288c061 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -436,14 +436,14 @@ def test_rank(self): pass -class TestPlnormLowTrunc(TestCase): +class TestPlnormZtrunc(TestCase): def test_pmf(self): # Test against macroeco_distributions: # pln.pmf([0, 50, 1000], 2.34, 5, 1) md_res = np.array([0, 2.12916164e-03, 7.36783061e-05]) - test = plnorm_lowtrunc.pmf([0, 50, 1000], 2.34, 5) + test = plnorm_ztrunc.pmf([0, 50, 1000], 2.34, 5) assert_array_almost_equal(md_res, test) @@ -453,7 +453,7 @@ def test_cdf(self): # ppolono(c(1,2,3), 4.3, 100) / (1 - ppolono(0, 4.3, 100)) r_res = [0.007670365, 0.011507417, 0.014065948] - test = plnorm_lowtrunc.cdf(np.arange(1, 4), 4.3, 100) + test = plnorm_ztrunc.cdf(np.arange(1, 4), 4.3, 100) assert_array_almost_equal(r_res, test) def test_fit_mle(self): @@ -462,7 +462,7 @@ def test_fit_mle(self): # macroeco_distributions fit: pln_solver(data) md_fits = (1.068510556981163, 1.8800439687956865) - test = plnorm_lowtrunc.fit_mle(data) + test = plnorm_ztrunc.fit_mle(data) assert_array_almost_equal(test, md_fits, decimal=4) # R poilog: poilogMLE(data) From 91d7dd51a0e3ba3fbff678b7a946173b54884b48 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sat, 19 Apr 2014 18:37:29 -0700 Subject: [PATCH 275/343] Moved and renamed mle and tpdf in distributions. Issue #73 --- macroeco/models/_distributions.py | 57 +++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 2403fb6..d106c51 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -925,13 +925,17 @@ def _pmf(self, x, mu, sigma): def _cdf(self, x, mu, sigma): + # Format input x = np.array(x) mu = np.atleast_1d(mu) sigma = np.atleast_1d(sigma) + # Calculate cdf from plnorm_gen norm = 1 - plognorm_intg_vec(0, mu[0], sigma[0]) cdf_vals = (plnorm.cdf(x, mu, sigma) - plnorm.cdf(0, mu[0], sigma[0])) / norm + + # Values less than one have zero probability cdf_vals[x < 1] = 0 return cdf_vals @@ -1139,10 +1143,38 @@ def fit_mle(self, data, fix_mean=False): else: mean = np.mean(data) + + # MLE fxn to be optmimized + mle = lambda sigma, x, mean: -1 *\ + np.sum(self._pdf_w_mean(x, mean, sigma)) + sigma = optim.fmin(mle, np.array([np.std(np.log(data), ddof=1)]), args=(data, mean), disp=0)[0] + return self.translate_args(mean, sigma) + def _pdf_w_mean(self, x, mean, sigma): + """ + Calculates the pdf of a lognormal distribution with parameters mean + and sigma + + Parameters + ---------- + mean : float or ndarray + Mean of the lognormal distribution + sigma : float or ndarray + Sigma parameter of the lognormal distribution + + Returns + ------- + : float or ndarray + pdf of x + """ + + # Lognorm pmf with mean for optimization + mu, sigma = self.translate_args(mean, sigma) + return self.logpdf(x, mu, sigma) + def _argcheck(self, mu, sigma): return True @@ -1161,20 +1193,23 @@ def _stats(self, mu, sigma): lognorm = lognorm_gen(name="lognorm", shapes="mu, sigma") -def tpdf(x, mean, sigma): - # Lognorm pmf with mean for optimization - mu, sigma = lognorm.translate_args(mean, sigma) - return lognorm.logpdf(x, mu, sigma) - - -def mle(sigma, x, mean): - # MLE function for lognormal - return -1 * np.sum(tpdf(x, mean, sigma)) +def mean_var(vals, pmf): + """ + Calculates the mean and variance from vals and pmf + Parameters + ---------- + vals : ndarray + Value range for a distribution + pmf : ndarray + pmf values corresponding with vals + Returns + ------- + : tuple + (mean, variance) -def mean_var(vals, pmf): - # Calculates the mean and variance from vals and pmf + """ mean = np.sum(vals * pmf) var = np.sum(vals ** 2 * pmf) - mean ** 2 From 897abba71e81e8ffae1c61afd33707cd14a6f130 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sat, 19 Apr 2014 21:27:35 -0700 Subject: [PATCH 276/343] Plnorm rank distribution uses ppf and brute force. #Issue 73 --- macroeco/models/_distributions.py | 139 +++++++++++++++----------- macroeco/models/test_distributions.py | 23 ++++- 2 files changed, 101 insertions(+), 61 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index d106c51..821a387 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -111,6 +111,20 @@ """ # TODO: Finish doc_rank above +_doc_make_rank = \ +""" +obj : discrete distribution object + Scipy discrete distribution object +crit : float + A value between 0 - 1. Below this value ppf is used, above a this + value a solver is used. +upper : int + Upper bound to the solver. Rank will not return values above + upper +xtol : float + Precision of the brentq solver. +""" + class rv_continuous_meco(rv_continuous): """ @@ -682,7 +696,7 @@ def _stats(self, p, b): vals = np.arange(1, b + 1) full_pmf = self.pmf(vals, p, b) - mean, var = mean_var(vals, full_pmf) + mean, var = _mean_var(vals, full_pmf) return mean, var, None, None @@ -776,15 +790,18 @@ def mle(params): return mu, sigma @inherit_docstring_from(rv_discrete_meco) - def rank(self, n, mu, sigma, **kwds): + @doc_sub(_doc_make_rank) + def rank(self, n, mu, sigma, crit=.5, upper=10000, xtol=1): """%(super)s - Uses approximation of rank distribution. The keyword ``upper`` defines - the upper bound used in the approximation. Default is 100000. +Additional Parameters +---------------------- + {0} + """ - upper = kwds.get('upper', 100000) - return make_rank(self.pmf(np.arange(upper + 1), mu, sigma), n, - min_supp=0) + + return _make_rank(self, n, mu, sigma, crit=crit, upper=upper, + xtol=xtol) def _argcheck(self, mu, sigma): return True @@ -839,7 +856,7 @@ def _stats(self, mu, sigma, upper=100000): vals = np.arange(0, upper + 1) full_pmf = self.pmf(vals, mu, sigma) - mean, var = mean_var(vals, full_pmf) + mean, var = _mean_var(vals, full_pmf) return mean, var, None, None @@ -899,15 +916,18 @@ def mle(params): return mu, sigma @inherit_docstring_from(rv_discrete_meco) - def rank(self, n, mu, sigma, upper=100000): + @doc_sub(_doc_make_rank) + def rank(self, n, mu, sigma, crit=0, upper=10000, xtol=1): """%(super)s - Uses approximation of rank distribution. Increasing ``upper`` will - give a closer approximation. - """ - return make_rank(self.pmf(np.arange(upper + 1), mu, sigma), n, - min_supp=1) +Additional Parameters +---------------------- + {0} + """ + + return _make_rank(self, n, mu, sigma, crit=crit, upper=upper, + xtol=xtol) def _argcheck(self, mu, sigma): return True @@ -936,6 +956,7 @@ def _cdf(self, x, mu, sigma): plnorm.cdf(0, mu[0], sigma[0])) / norm # Values less than one have zero probability + cdf_vals = np.atleast_1d(cdf_vals) cdf_vals[x < 1] = 0 return cdf_vals @@ -944,7 +965,7 @@ def _stats(self, mu, sigma, upper=100000): vals = np.arange(1, upper + 1) full_pmf = self.pmf(vals, mu, sigma) - mean, var = mean_var(vals, full_pmf) + mean, var = _mean_var(vals, full_pmf) return mean, var, None, None @@ -968,6 +989,7 @@ def plognorm_intg(x, mu, sigma): plognorm_intg_vec = np.vectorize(plognorm_intg) + # # Continuous # @@ -1193,69 +1215,68 @@ def _stats(self, mu, sigma): lognorm = lognorm_gen(name="lognorm", shapes="mu, sigma") -def mean_var(vals, pmf): +@doc_sub(_doc_make_rank) +def _make_rank(dist_obj, n, mu, sigma, crit=0.5, upper=10000, xtol=1): """ - Calculates the mean and variance from vals and pmf + Make rank distribution using both ppf and brute force. + + Setting crit = 1 is equivalent to just using the ppf Parameters ---------- - vals : ndarray - Value range for a distribution - pmf : ndarray - pmf values corresponding with vals - - Returns - ------- - : tuple - (mean, variance) + {0} """ + qs = (np.arange(1, n + 1) - 0.5) / n + rank = np.empty(len(qs)) - mean = np.sum(vals * pmf) - var = np.sum(vals ** 2 * pmf) - mean ** 2 - return mean, var + brute_ppf = lambda val, prob: prob - dist_obj.cdf(val, mu, sigma) + + qs_less = qs <= crit + ind = np.sum(qs_less) + # Use ppf if qs are below crit + rank[qs_less] = dist_obj.ppf(qs[qs_less], mu, sigma) -def make_rank(pmf, n, min_supp=1): + # Use brute force if they are above + for i, tq in enumerate(qs[~qs_less]): + + j = ind + i + try: + # TODO: Use an adaptable lower bound to increase speed + rank[j] = np.abs(np.ceil(optim.brentq(brute_ppf, -1, upper, + args=(tq,), xtol=xtol))) + + except ValueError: + + # If it is above the upper bound set all remaining values + # to the previous value + rank[j:] = np.repeat(rank[j - 1], len(rank[j:])) + break + + return rank + + +def _mean_var(vals, pmf): """ - Convert any pmf into a rank curve for S species using cumulative - distribution function. + Calculates the mean and variance from vals and pmf Parameters ---------- + vals : ndarray + Value range for a distribution pmf : ndarray - Probability of observing a species from 1 to length pmf individs. - n : int - Total number of samples - min_supp : int - The minimum support of the distribution. Often either 1 or 0. + pmf values corresponding with vals Returns ------- - ndarray - 1D array of predicted ranks - - Notes - ----- - Function actually implements (philosophically) a step quantile function. - Use if ppf in rv_discrete_meco is too slow + : tuple + (mean, variance) """ - pmf = pmf / np.sum(pmf) # Ensure distribution is normalized - - points = np.arange(1 / (2 * n), 1, 1 / n) - counts = np.zeros(n) - - if min_supp == 1: - pmf = np.array([0] + list(pmf)) # Add 0 to start of pmf - cum_pmf = np.cumsum(pmf) - - for cutoff in cum_pmf: - greater_thans = (points >= cutoff) - counts[greater_thans] += 1 + mean = np.sum(vals * pmf) + var = np.sum(vals ** 2 * pmf) - mean ** 2 + return mean, var - if not greater_thans.any(): # If no greater thans, done with samples - break - return counts diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 288c061..39306eb 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -431,9 +431,26 @@ def test_fit_mle(self): md_res = (1.3195580310886075, 1.1876019842774048) assert_array_almost_equal(md_res, fits, decimal=4) - def test_rank(self): - pass + # This should be a slow test! + + # Test against ppf. + # >>> n = 50 + # >>> vals = (np.arange(1, n+1) - 0.5) / n + # >>> plnorm.ppf(vals, 1, 1) + test_case = np.array([ 0., 0., 0., 0., 0., 0., 0., 0., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., + 2., 2., 2., 2., 3., 3., 3., 3., 3., 3., 4., 4., + 4., 4., 5., 5., 5., 6., 6., 6., 7., 7., 8., 9., + 10., 11., 13., 15., 19., 29.]) + + pred_res = plnorm.rank(50, 1, 1, crit=0.5, upper=40) + + # Test the values are within one + diff = np.abs(pred_res - test_case) + zeros = np.sum(diff == 0) + ones = np.sum(diff == 1) + assert_equal(zeros + ones, len(diff)) class TestPlnormZtrunc(TestCase): @@ -470,6 +487,8 @@ def test_fit_mle(self): assert_array_almost_equal(test, r_fits, decimal=3) def test_rank(self): + + # TODO: Can't test this against ppf because ppf is too slow pass From 34b2ff073dcff77b55aa6ae2a09f96a1fcc3f08f Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sat, 19 Apr 2014 21:44:05 -0700 Subject: [PATCH 277/343] Temporary fix for _parse_arg issue with kwargs. Closes #62 --- macroeco/models/_distributions.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 821a387..6285c41 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -806,8 +806,10 @@ def rank(self, n, mu, sigma, crit=.5, upper=10000, xtol=1): def _argcheck(self, mu, sigma): return True - def _pmf(self, x, mu, sigma, approx_cut=10): + def _pmf(self, x, mu, sigma): + # TODO: Add approx_cut as keyword. Strange parse_args error + approx_cut = 10 x = np.array(x) pmf = np.empty(len(x), dtype=np.float) xbelow = x <= approx_cut @@ -837,7 +839,7 @@ def _pmf(self, x, mu, sigma, approx_cut=10): return pmf - def _cdf(self, x, mu, sigma, approx_cut=10): + def _cdf(self, x, mu, sigma): mu = np.atleast_1d(mu) sigma = np.atleast_1d(sigma) @@ -851,14 +853,6 @@ def _cdf(self, x, mu, sigma, approx_cut=10): return cdf - def _stats(self, mu, sigma, upper=100000): - - vals = np.arange(0, upper + 1) - full_pmf = self.pmf(vals, mu, sigma) - - mean, var = _mean_var(vals, full_pmf) - - return mean, var, None, None plnorm = plnorm_gen(name='plnorm', shapes='mu,sigma') @@ -961,15 +955,6 @@ def _cdf(self, x, mu, sigma): return cdf_vals - def _stats(self, mu, sigma, upper=100000): - - vals = np.arange(1, upper + 1) - full_pmf = self.pmf(vals, mu, sigma) - mean, var = _mean_var(vals, full_pmf) - - return mean, var, None, None - - plnorm_ztrunc = plnorm_ztrunc_gen(name="plnorm_ztrunc", shapes='mu, sigma') From 15884daf4d53871b1c0f4b23c37675879c48b7e4 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 20 Apr 2014 22:25:43 -0700 Subject: [PATCH 278/343] Unittested format_data --- macroeco/misc/format_data.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/macroeco/misc/format_data.py b/macroeco/misc/format_data.py index 0a58f45..e8f73ff 100644 --- a/macroeco/misc/format_data.py +++ b/macroeco/misc/format_data.py @@ -61,7 +61,6 @@ def data_read_write(data_path_in, data_path_out, format_type, **kwargs): form_data.to_csv(data_path_out, index=False) - def format_dense(base_data, non_label_cols, **kwargs): """ Formats dense data type to stacked data type. @@ -74,8 +73,8 @@ def format_dense(base_data, non_label_cols, **kwargs): The dense data non_label_cols : list A list of columns in the data that are not label columns - item_col : str - Name of the item column in the formatted data. Default, "label" + label_col : str + Name of the label column in the formatted data. Default, "label" count_col : str Name of the count column in the formatted data. Default, "count" nan_to_zero : bool @@ -136,10 +135,12 @@ def format_dense(base_data, non_label_cols, **kwargs): if kwargs['nan_to_zero']: ind = np.isnan(columnar_data[kwargs['count_col']]) columnar_data[kwargs['count_col']][ind] = 0 + columnar_data.reset_index(inplace=True, drop=True) # Drop nans? if kwargs['drop_na']: columnar_data = columnar_data.dropna(how="any") + columnar_data.reset_index(inplace=True, drop=True) return columnar_data From e9f7aa170c1078b8055af0b1a5ecdf68ef21a625 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 21 Apr 2014 22:35:44 -0700 Subject: [PATCH 279/343] Fixed rank function for logser_uptrunc --- macroeco/models/_distributions.py | 7 +++++++ macroeco/models/test_distributions.py | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 6285c41..e893c69 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -654,7 +654,10 @@ def fit_mle(self, data, b=None): return _trunc_logser_solver(length, b), b def _pmf(self, x, p, b): + x = np.array(x) + p = np.atleast_1d(p) + b = np.atleast_1d(b) if p[0] > 0: pmf = stats.logser.pmf(x, p) / stats.logser.cdf(b, p) @@ -666,7 +669,11 @@ def _pmf(self, x, p, b): return pmf def _cdf(self, x, p, b): + x = np.array(x) + p = np.atleast_1d(p) + b = np.atleast_1d(b) + if p[0] < 1: return stats.logser.cdf(x, p) / stats.logser.cdf(b, p) else: diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 39306eb..332f9e1 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -281,6 +281,16 @@ def test_n_close_to_s(self): _trunc_logser_solver(3, 4) _trunc_logser_solver(100, 101) + def test_rank(self): + # Test rank against values generated by hand + exp_vals = np.array([1., 1., 2., 3., 4., 7., 11., 18., 31., 62.]) + + # Test values generated + test_vals = logser_uptrunc.rank(10, .99, 100) + + assert_array_equal(exp_vals, test_vals) + + class TestLognorm(TestCase): def test_pmf(self): From 4644a7e0d3bd8d6b596eb14cc0caf6e3b3dcdc19 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 22 Apr 2014 14:06:01 -0700 Subject: [PATCH 280/343] Update mecodesktop name and add icon --- icon.icns | Bin 0 -> 121894 bytes mecodesktop_mac.spec | 10 +++++----- 2 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 icon.icns diff --git a/icon.icns b/icon.icns new file mode 100644 index 0000000000000000000000000000000000000000..575f8232da4aa426cccf928ed4eb76933521c730 GIT binary patch literal 121894 zcmeFac_5Wr_c*@KK~#ollm<$sb4+PK11g#$qEaf7(nN@4I>)C7GwWZi0Tp87Yf-0as56MmxsA8!L1&5G51ySuOEDsT) zQYhpyC^Jw&m_$KjM+y@{WD1oGa%2#oI#W<73E`;}OcNO@0umRXbOC6X9;gP{j)IDu z5Cer=PIJVnAqdqV5}+DFB194ai9kh!i4FuL1{Dz(aUy_TqBDU(1YWiT9Fb^Gz>7n_ z5Rk9~0n7^$2qKOIyeKq~jzUBdas?s02m#YVv_&{6V1OEcB*+Y^5s`t85G4pZA!F%N zCt$o-(CBCGXmGdXcX+y_sy&6KPKOtsO@2p%=i9eg0P|cOpabQNv@!r?KLXUAWFOr@ zqnRVLxRpkGqrWq`53v0_2&K?yiEB$S0U9j@p}7OJz#UzKU_Z2G2d#8N?;uFK4B8B7 zs~-)4ALan8{akDa{Hh2b;xIll2>0y5KzM@a@H6Q_2>J)$=jM}#L4<^MEP?SIdh}0X z^OoWAC14CiI15q;!du#qf)Rr?$woZ@;Y?uydBc=rfN2Jk3Y*;wW>9v8Dd!CH5Q-jv zbp&}&Y(CmJVzb{3n|-#x0s}yDv9YC3TYD%w!Sp9tpRsnd0qBeawiqmtl`}|URG7!- zNx+ZHVLDi0D+A`bwL9?IgATeSI;^o3L$V^dV-_(TB%qmDdf8k+woqda zK%d8oIAo30&jEo&8*6%99kDV=osM^e#f3vyQ(%=~XapLe>d->cDQGN?WCfGf5-X5A z5LRP6FnNMKM|#N{;m(lkv1(cRp-EV;kgU{`5p*jJy0wf9-C9B^gKmY@6c6262y4$9 z;fu*sCyE;^7%JJF;*3zSHw9HgUzWRL3km*%GSv;Ve1YWTJB6?BvXpW zgHVwpg^@}{Ww3-{%^?R01DTRTcBP;aI{@&dv_UPftnuWW3Qt(~sKTCtqftHR{8+Kf z3)BWsptC4AaxQ7Zz*dEyt za8g7s@Fke{@uu4YmxPF(P=uZV{qk55I)s2z$FTg4hynZ)f+DyY24MmLVfvwW2x9!0 zQJ6|ZHv$rXJ`#lr@p?i9y`ioSpi9_q$XCJiIZ5|YjAOmj(Axd}f^A}=XQBK=RK!;e zPWIQ&_NFd|UsBmrZ_#KE#v!v4x9K~@mtkIy1}z);aBTOw_nz*ghJI`vL1jOf**}s7 zuJtSHcltEj&m)O6@{?~gTHbf^k(QCvuUFmQVlZr{^k>@j5!CjiLt6?4VWA;%`FC=a ztHR{X8ABj-kUV*ao7U+fjKsY@d}lTsq&p8H`hvMxbjrRHX|ylX>2%RSr(Pw2J3`+& zzH?k16pyAOVp(S0{!XD6zJYg!XI2je1>mLLj3#4S2=*Mr&^PfiZDYto;wS>}&p5EX zX!H|4D2_@1y=BtF(L@@pZT1iy>YRe~$C7EshRCRiMpI-QTQ^$#Fd5a;8WD^Zk>42lhb7<1Xv2Ue8dJN^gFQ3Evu7UjsCh$cSouSmURpd#{k<7u$VhGky~qCN zCAa50{aYIUSYd!Crw%f_bsSOeQGCm+qWz9b{(3QuVjczPAH#NOrdM?cXKosgaHmM| zSU_hJN8j&aVLyT>(K-fpL)f9Mt{_HJD{E}GhRrG%F~gP$`;iUF*~TA>Gf7^svH2id zk{`%;!oFpF5fJo<(;eZ=NEc!6f~^}idK}5x9}4#HpDVT>`(XPUY~9Gx1>1$aVfVI% ze;jR`5l$WU&PiazN{?g>+qtz9RLui4L32r1umIbtE9~JIXzgJGRBKloCM>3<@5R>O z)nVUfBGD0EQ1LU4$Qc4-5`y{+h8GYz0KYZK7i$*AkL|pO6o5q>STM{0012xvMsu*i ztE2M)06C!x^f1CA0OYKkAd+B&pn?Q)7!7M3rX>)F=Y~Kq5(q7D1|k+*SRsob8y7%g zQ3pa8C&UGVv_U=Au7lK5$QsiM`xT50R0lzIfY}W(18idrSU(K*fLH^69?=0yPsC&f z(FWE`AbBGOQ#cI3$XNIzXt-Gb^2YE%z~Bi5E3Dvw4TD}V)@oNMTAM(Wao!0GLR=Gp z@K!cZZ9EJ_FM|0}QV3I}jzWt=cp8NghZu?}6e`S^Qi$ZK(HIhoGs>_Q8mW zQc9*$$u1CZ_#+Y-QGF;r2wy~{`h$Qcy%Yr7KTH)0H56hOR1E~!*r5+fF9VAUx(G2w z5yUmnf$=_AAX1E-5XjKGIUbZrR5Ar30tiO{Cc6O)U^xZC9}IS(Fi|0h#$pjfg>fu) z0bK|*q9TAGj=%1lE+JQ}9ud&<&?x%o3JHQY;3r&1I06C2mY+xv zfw2{}CB#4ggG8?&oFJTnkQ4--!VpWj6Z{cQjOYu|iVq4Vz~Yx721C38gDFJ}CBTX& z5HCVhB?6HY(G~f@Pbc`#1^GkJ0(!6zOx>4&m*R)B8PE_;Uc{5YAb^lCgfjrU6PSon z5P^ZEK@d)JB3MWy0wW>jN`S5v0P8XU!5NQ%2oaqh7SV}1Vl15e}DlB9a0wXfFhM{3r;z0SRLP9%h#p0I4Cs12Ks}WQv5LH^e*< zqZE+<6A;Y7sudK0S&Bt(uxz2fVGs#H2x3JM7(!kz`UoIi2v~_?uvmp%fN%w>0Cf~0 zh)DIKWAs38G7I{D`b=m0BlXO;p$~rl!Cxx>eP)E%Z_vX2fM2eTMmzDl{5XX~Y%`Ym z9ey^#-jI6w*4;m4B7Sb!J!dbbR*VQS$-iP3)bYz5!8aHe&}i=C@pI`VG!8S2d2j!T zfAa2>uVa|#AU*Wy{Si(L3UZwP5v7ilDtio3GmUm{wE9T~pGGnbm;2%zUJRB^M)5Bn zPD>0k(%)_F3TeX#;gS4CqoXrA&*vC5E}QOQ_R10L3?akV;dg(5^(rRzvk-?4Kf_@? zh%v?uBM09d2?rp&BYKVscNhQA?=_PB+r9lT?$h+iBax#HHG4-;2ChLC3co(;%Lp93 zsQq32yb)$gRv)?r!;Tm)4g+J}*S@n)8J@U_EQl+64BId`0G@T|84inYF`(I%!}PK5gkf%k zLI-J8?1-mkEZZ>L4-FfBp3{*{i_nbPVe(J!fMI6n>_Orl#8~hX+c4rI=MVm6bmT2a z%nnn3e*YL+gj6wdKLO1j%Gv$QP0JhUH4kD8(0re97za@;3x3oFAsq~r)Nk$DS*P#p~ycsto{vG}>%L9Hx(X zHwcbpM;z3r2Aqz4WH#liKTG-iXFjTGxq5{H*#3V zNOq3prWHeI@XLo~EPKOore87xf#vZifoBF&C*lSgciMM>mhjCG?EK@iGPtaxt31wV z<%AXaM>YQ`gT4uN(B)*t^w8-4bi#qB4?Bs{fAk{ky5lQ7J5A~V5}bSWzQEho27bcn zw;Re>mKqa-s}k!DET@g#nY3eRX$PCn*s00MrWN5V;XI5;&GhpUWSp%(A_f!C?FPvA zaJU{!5x+)ECUBItfkWq0IK^35!AaVN1gAq%1!AOw*h%>^9J)*4mIs8e!`d|)q+W;^ zEZ{mP8K}94!4eeT0vlO~0glI{JAloA%(4}UbPsrMLNoGh(x4_JQUP!!W0iXVN9IJV zvL(Qr034W*hfO@RD;qO{CEa5&`W@^DNIx8sV(9h04X18o;bQ}bJ9QH2HXN#LAcYO5 zbwrAS^M4k0G9^KS5GfMgw`~~VkZn#sWk*6HAp@it!H6}M9w1$TQU+!Y;`qTMb2ygh zP6HhzYYSW84+1X0S&&lbI1)WS9}Gyiy#O4RlgC2>A_OoH?vWC)%AIn+w1wFK!a5F; z0toJ>kX1M|{Sroq0Tg5u^hgn4*&-CAq6ZJR5U*QB*@R*|QE;ne4hdCI9R+eBkQgCd z0o^Xpq!_HiaA?xIf^`jWA**;WAA%z2T?MKwNeO@e7E(CqfiwU##|Cg97ldiQL}$b* zl>*8!zEIE&d4^Ngse?=ic#s8LUqMIRw1FTWI1pqp@MlQaNYl^h&}K8NrPdi>3|P(u zfh;}Kkx55DFIhU!hXB$YftXz^MFE|I4KeW3lL_>$%7z<}KtN;J1wdGl?$YU4Ho*#G zzY7HmtP}v(b+97BOrb^4s&*x7prUpB&X<-=P}i05l|QK379JDd_d6G!QdqoOv?hB zNHj#n+5yW5(R~SK80(pS_=-u6my4GSY==m zS>J#+!0H&B1#k+2)ejyG)0m0Lry&L_fH6AeaBd+IR<^p0S-TBL0IY8VAs6217%~r# zH!;ZqguSc^Q5uFWMCeostRfrIePDo9Wc2{vN!Ys5D~3`c##9Q0>-41tM=9%gY$-y1 z1^g$8R1WWGOy05rN>?!^^D4~sTm~Xiuc7<45W4I$s9ylx`B>G{(cYN1AdwoNDUbzq z25-^~&f3ZvoG=Lwn8A$&`w7Vz`aiH+P+)&4BEt_VWLB^X?5~ieK_xwq^nslPGCYGn z5xEuMH!H}qmpxg@g|f?sri&t1h;wjL1i-J(~9Api15`EY6&F)YZxTms4!cK zv*8VOf(#(892DWnWU4nM2H~p7SrlB^Lmx^s!o^dwfKW&Fr9?r?p$vH|NKutXVr{++ z*)?i4D)FZPNTGv}0>tuK)GH_h5@h6R5|$Su!_`7j1_eEYO@bUu0jM+$qheniP{<*a z%P1AIo@0-~Kor(4v=moX2i~j<5M)456Cm3K-wdGAa6rInr^W-PGr9_iG4Ku4Ye0rn zTr>rMvKZ(SNJ6o%73R`S0rU`-a~VKTYZy1+L*%@{YLr86kqU5W zKE2gGpbg{Ahj%an2%}*T*4}u?Kw@{!Wd-yeL{u-VP2d2ifPf2aAQuBvexCyUO~x*d zQBfgKLJ_@8gEAF{h6dB^5hew288%dq$|Tz(n5o#9!u>JO>HvcDCW0(5Bw*ovpMuQ@ zcZ^ncALa_i1w0SIiPNPqEO>CZEVoCPP4M4zd=Uj)H`NuB1A7p?Kt>sp1Q$&&6<|XM z-Wz5!W(HbXd=(%lmt%bpHCTq#6b<=hdR3^oAVP}YY>iyY%_Kjk8)tpsBi|PUZ6|C;F1#&D#W0B6!6HUcCd`?vFSif z0WMp(g3kh;G$@mRC!2B$;d3a^XQ&M35A`nDQ(=)6=fToLw0v|6>H;1Z zs$5YB-3s%x`aUc)M1BB2ixD^zYAJ#Vo>}Ne)ddGDF9JDiBDh#_M?yPfg~7oJVd-cH z-ibsPLNjn#5Byq^y!q-6&U;u)3 zVPRqdd~Z;Q@I<0WDj^qiievyO3*n^*#4J#78{tHWIlu$i0b)KN(y&~1Ar!#F3c~@9 z2p$%6mJq)kAr}CMQ=Z4I(kUs$+(MfOCaRHGx0^+mM$QhL~H1cnj2v zgM$?bCV+z_rW3%q5{MxLaIU~is5q!dIY1q39YWE#Ly?qX?lW2B?w%5U&wnr3+mJ5R&LPUScftIyefcNT>!d zBmnse5P89^iY0=gU=$9DMC8G*@)L;mJfwDFvvjbFa&^7;0Sp$A-|49;qn5zSs-SDwjj9CKw>}>fYFly_sI`e z0nlVb%*J>{0tpW29sx-PVtIWc;6?5MJ^=Ff;9|ijRt9$`m5XtUV|O1Hu1@FV0pf48zaEH(qaHK<)u!-XaAH3WrL;wzWg|WJTLxomgmY_An zD*!=R3d$mpgJt1-7-@NMr355lk2P5g94W>m0bL;qjub14z?TOW-79s#_kp#4y{ zWJ8s1V3%DG(?Xe;0=GC9pakAil!Wn1VXF~5C@%q^EJ6K?m-H z&Qk_k0uZ3Wr$CpWBnaM9Gzbc!^)L|v2o2Z@b;H(|P&1k$FADcR1h|0B27Tlmc2LqAK7Pg7`$NI`oFQylRtEIG3r|EzgCy+*w_2m^Wnc{a)cp) zn}f#TCmcHd+t1?o?^nw-n)QF%IwAP`rvO+d{CB6{)Pp(nZz8^TKW)~3ul@Y`NtnYK z_0<1Y%uf|pv%59!=O@?ypW0{SkQ! zhK&C+`%J4XQpf-H={IYz?CxT%_Ts<&Q060wwXI zf2)Re@sI4!^Z9fCGNPUj4&5FB(Tkh@!2UGXUw?@;R_{1)k+SMfj-q?YZ`+@A27dG9 zub*!>*Yb?~rKBN6zrFex^!}6e*6p*1nKUZq8nS=vH=8f`>rd?dxl7*L&qFIl{-V?m z5#Haj&uBVc-G6AAu#~rZ5}F<`desdJKo0LW9?>!1iw;Zw#=AWLKA)U)X81$Pu@aDG z{^!SN-0y(Lsu~9#$U49Z>Q$bOLmNav<^JiJ9Q8}-2?^hdjd?KLk~zO-`;G3OPx-a@ zuPHsbX9mxd3oeg8@Pne3|I~Q2`X`U^YokMyFFB;)Y%aHN&xGv!|73r|A3D6geCxH$ z!%FA6|ARE7wkLje{>jYyMseOS`G>ycX%qE4y<5)w;2yWMZQ|Y=*|q)mPLJ$NCY67- zPWv{0T&v%an|`v2h;1QqW|HM2N0k`xM zJfnVZV65mM{6c@^r466;IA0QvFF1%*n!q|*_m7LGav$Rl1?)O`V{#f3S*Fuo;D3n! zRAL|eEty{{sn;4IzUIn5IGXYFOnBvRc|3_9xK7g_JHAKz z@}IoZKh?rxjLnfC_FFdNJ+DQFxeon>w;QFg_lN%&*7@W8i{Hblo-@S46go=R-vIFO z$S)tB@$;p1!?6Sy9+a`>9#eQyn(a_zz@W0Xd|8&)S>?ivT zvj>~c`zyEnlf;CGClh~Z@N?WZbT`OVHKE$yCaO&|`tW7&SVa+o{P??L34aIYPHY}O zc4uke927YFcjm@Q^@&exf8khx&4c_afBR53R>5f4VPf-0aWwnz9~EN$mj=In>Oa^& zJlg)3Ay;KF8vWPMgeN|%YGVw!4_0^Xf0T3oQCs*V^B*3cAIwntJe1V<=@;}zas4*X zGQ5ZXx)RZ&lntE_MaGXvMzQ`5p!sXN!;~8%^?b&1r||rTvF@&U^#@<64 z75DFEp+?I7T(19qY@474xg+J^mvgblW+d*f%isQS?qjw_#=aB`0e=YVFVELwYU(qn z?H_)Q{N3@P!;Z=rMSln8G5&+)LGp;`=j*gVRP5gw94+->-fx*lBUU~5W|n~r+?^#dTorVzX20*zia=;{+BZ-!r1aR z21m;j|Na#x*Y~WW(V%^~(W?FidNyIeJEmtI4qK2IB-jtbfBkL}7-M2Wa5&Ct92&C1 zzqUIh*tu*%dt-^*L(6R_H_h_<7a-$Tw0GxN3;zJS4C*;P$R;AIZX>E&T!`fdyN`e+BiIe>!Dcb7PTx!w1)2m>M6w5w5xmhe)+! zRsIRwyLWs8|Adu#KKxf)r+jw@e`0rt_w#S<_~TWE#d*&lTKq4}4vReHA5Zmf;)d=v zb`C51^LNE>?zj_HXxJP5x%u1d6Ds~q;)rnIkE+mb_S-w&gf+?eBdgz;8{{U``uoH; z-&OueA^T4@yooBq{5Q+*o6n?)bp8o#!Jo8xh?gq)Cl)5=UHZrNd(0+IFHLN3EcG95 z0e`ODz5m4LjJ1xC=Fb);!ZhkL{;cCa!5B)W7#O z`nA2$lx6WH2mE-%%QO!SK;Ml zo8p8eZf5A2m>V89T*`}8W9Y36Z|uYES^CFxAyI=ZM);4BdGPBHj>85gjRqGemP1^0 zSoeq?8Vn6^h+&u;;o%>+@Pz*`wTEfSw4I2a@*Pr#5$Mktn2{NW`~ELY7cEA(^+rdu zO$>}=dADdAs~j=n)i`2gtZ#6dS5izyLPAUu{>CYmO2;84Mn?F6f|k3As`XQDko<|B)obXxe*P9F4#yy#Ko z)QWiBb-eI>rI7hOu}*RYjkm|HJI`3B^@9DQFK$!3XuIm$Q#MGp(@xi%S?u?^X3Lfw znfc9{>?2v!5Rmv(#Tby@nBC2`)%kkg%C7H>8oByoH(3G9v* zT33tf+t8vBVW`|@#{D8@QQPB^yWy9-bI!z_O5LsYK2s>n_)eo{_m(Hi386RFXUUc4 zKC)((?3qWBVPN1}d)r*K&h_%?u3JZ0dVSie4wbw(y{RpPeNUKjOUYY}DeDqn%%0ou zdd=CcCUsSd)UEurCc5{fif#xQP_4A`U$#qeN-wV$s<1fyL_7JxyO`;VErJ&<@GYv+ zU-ocA!;1?7PK(>L4LFLAhbwI;4d)g5a7b_-*J>+)1@@TEsZ+;;NDB;J$f+#a<~J!iIvp**+6n)!3jLek2`>kQ@dmU7qQHp;$i`Z)E= zR+6-eLdU?Q)5H~z$=gZc7Ms^gsy1vij@$P>Ns%>lfAM*KyM3CzvMMWid1{_cb($yq z>7iPwZ(74F#bn1_2KN|NMCDv(k_-)9TxURJ%iL-CE=eK&JYPmuw!*ZmD>tOxR(^O8 z?q=pLhv!+|;^e#WW!HMAz5ZKh2g@GZIUaYGn`^g>dG36<51GemFFqF8%Ak0MPjA<= zWA`(XlL{4=cVv?Gb(pDn)GsXfDC|_VkGX39N&$VA&go(WdC#8b$lmQY=uCHP5yq`( zT8Hk>b37+qeTk!l=Fq11;U#~~`YChk?=MT^)m_xs8nyBI^nHC#_MRgREDy;);aFKC zXL2l~!)rFhY~k1Yj(F9>?tRr>3C>F%b}g?bhj*;a)?dH&HS6q*Z^ry77Q9LhD`NS> zNGs1=?`*b|dl}sPHA0!nI%4phE@V@@#ifHxE(aRza3xvq(z|2p%V;wt&wBA5cjK_9 zk7#>veQ$Od*u7xyid-A$x$Y&*0=;S>FM-eaX)WHC=ijBTSmx>TVBHLD#Z}i;EgnlJ zJ-s~;cyqy(9u*;O+$5FWx3ya=K4mEdD=d0K=a3Z@Jzv#4Zco@oY&2zx?`MGaY81 z>Zi4Q1!BhUyl)?S>a>j~?!aT!(u-PMF2pH4Z%>Fg#8n?yESmW2D|uHN(f-!;7_FkS z*XIhm2G!5z4&8Yqe)W<)cgm>>ua}&a<-e74V_nY8;OWS%cJbFXC*}QIRg#7Ni&q?a zXJaqQcQ*WCw+hbxe8Sgy4p!R1wl@m$TB+eN0(?a#d)L`)4waV`Sjp>qRK??#!{XkR zdD>GB#Xpi--}*U+%aA1~^7W)c7d2FK?c~U6Co}|Ka5JQ96rUr!tkZokHDQ_jrAg_F zR{CkS>3p`>O$iQ@k=EZiPdct9<{3Vfl}r=j+sPS{{D8aSk=eI&mWN&&_byYCV$*MS z;#?TCS@i4ku*w3)G|s$5)pq8^&9N7Igg%M@k#`^KEScyp8WEZYb6(x*l40%Y zU&2$kT7b&oR?xr)_OFCC392x zKBvW(0}AugBpgfpZ=U^n@!P|I^v{MqJokHN&pIR+wl7?0nQGC|!mGBQyAr2;5-pQ% zMN9f|q!~8l`5`&7k#bX_4zMkAf8=&+U&lv%BSyQbMb*orf;m+hj^r_17I4-RH+lEj zbiXlm<>e(Df!~r(KM0LFy}CVnk|K9qpCH93Z7au%(oqBRc z`n8}a`)>8zfs3p4*q_erS$<<6>hSElTL;jl7muJTNpbPoRgls z&|gAydX=R8mW5|GKNr)n^DGd`fkAbfcT7Vg(0{9=h0clW`?HPmnta+zC13g1Q;G50 zHO_6#YI&us&5(M?uTUb%pA8E;lqg=bT*Pc&L*`NPG9M}OSZgo#mtNZ1+`k-bClJy^wX58iWSdgESs}; zmt?gnTF(?UGum$P?(+Pla!WU>glx|i+cSS=1j z+~R63&dVapR!Y)#wk70B=U+Vh?7$k6T&1X~wfJUrHTC73hdsA&Zt8awSYS&uAqT;;q0fGrsdvMuI1a-yeD#g zt1#)5o@uY1a84iUioE~!T)3m_&2WvVr8ne?ImPUa8!mbG`B_K!ZqQo&nC+N!q(fFz zap$Sv!iy{mGjSbz=AU-Sm~P7V)tAFwjKWh?v|ij?>5lIKf`s`svfi|#JZ8#nZ}}hP zoKwu(vZnVazxkmB>#R~6^)nRbS^8EeZlBe#mrL+-Bc3fUYFpu2{T&54hZ`in?peI| zWMGdH3y*}cantEpaSyK_@JE*=XP;eLpq*a1t}vyzN4aG8rn-XXTXT-T=zdsxo@7%g++3U@kR^by?b3B^oWm46C&#=r> z+}l6#fMaw_X_W}ygO_2-)@LpIKcPd)Jx7F(NcHped*o&-FfLDQeo-P%TDD=w+9}P} z&*MYp8y@z$xM+nzYM_5);yMpwhrS@$H`23e#pO>+oH;E{F*)$W%^ufzZL8m_w+BcL z-t)Se?+{v8PNQSy>HAIx7e#J(%qD@~b9>C|kQevB#kOw;YVlXSFENtiTulXpv~@QKUGHmTuU z!jiJ@dY`1|85^JH7n)=6Y0Yu30g6LpM-G=z_OyJ{aK@F1t>xzDuB}k-ZZY>zk_s@;45Y z_?)H&pSylm{FLO^ONEziaG4jW5K7EC4Wpg!pJGjUY_h)IcMki)$&r&omJ6NC&OH4> z;h=x0^UP`Yd+M5xy(MX|l^(X8Y8xrj!d$RZ-#xP`b8+38Ch-IEd!Frz3}tKHdwxm^ zjd>BiWW)Y%NxSn-cwEzTcuweOdAR1fceU*~3(-f-e1{4ub#8nTb6)PU#}VyoTrVe` zQ-(~ZRaq;rUSmA>jJP~zp}SVDq?e3Jf9$1e7dY@TSEvT%1-q`jim&K7p6iwo=YK!F zvV3;ovHhFMH?O|wcG`eBZoAEvhrMk}nU~mxJYW%B$Ln5{pxxKN?Q)0gD}2PMX>Y4o zoj9X0Q`>XS)o$A#MR_T`||%0$swnQJ&*TA`Z{m;cxr`!R#Q}~MWWQs`#A$!Tv?N1-p)DdyM+_?CH7-A z;eF@2E?lx{6LncL+eTl>g;%^8EliMc+C^rbp4*J;)Z01t5l`Q}`Enb3-H9!V+G-Kc zjx6R`2f35x{#q2w9)RPTS<7Hv27$ zt)Cq~t0M7@wVkf3W!humn)MBmtCl~D7Lic8<4~~2>bc3mn8gwb?BWz5=A5J3zn!M- zTYV#Z=1u+7%0BVP^#mWjJrT#!-f@^OJh9$J?NiSur}f*XewoLS^}+ltzxh7R7cM!@ zn^Km#Y;B#254m&?7jQ1_@>fQwb4SBgaoKpi=NI)@ob$sYdzm`zKw#`JaQeKiK}SGx|QFzv%Xs5<2BD|_50%TmM8kFa^qih zMQg=>6K;x-)LfA)KEP-edjCp|Gv`4Ue<|ni!UT1;JIOQOzcVonX?whC{kwFEz^%HN zV+B9pC>sS&cmK1;ZwlH0O>w#NW zxC>Ly&06@1DPq5Ua^?+@>rVIf86G$w|E8y9q33a>d7fFiTna%gyJx7n`c8I}OA*?k zRjcp(^z$466{Rx28=1y$zbXEMN^9s*zAJaCZ*MZ3 zzvH0Bt#tCym-FJ>BUHq5x+bqkkUku5^6k8L$d|2riTm}>i|Z;VG-hq$qt3kamLX2` zl<1tWZQj{Q%|_2S4T$zO(~FxxPzWKDJ_?9t56g6*Al4?Qb7yjH?Ne z$cX5YKcl^`I!oD4OXP~~#qEvL%iT09)D;lrVu_`U;#AocUotu(PF57kq*^Rhjg1O0 zdayi|m(Rw#u^{N?RVTYcHZL#dz`q+`C6nATu2Z8GR=>AakNCX(`coCze%h*|X9Q*` zU64*(JH5v1rnAxsJ3^V-w`mR%gv~xq%~`_7Q>JdXAW?g!z;e67@eCtoGbOYaq9OyQ78Gmj;Za)7r80Ma z*f+Z@ve@&zxY{d|eJ>v6y(yLX<(|PC2O~}{jcJlQo?SSV+Mi{h<$jIfO^<4N{U`j@ z38RJLeG#;qW4vFH`a} zYn{4VJ}a5juwHbP6qHNwcMzB6-@kj|?Y6WvslFD+QtEVMowmo{nf&b7o>iSucd1JE zPdjry-p9VjuQMl>*>F0uhVRKT?ybkxeN^I)DvS&BpLCbP5E@XNugfAYW8eOIYieyy zEt;bfIhjrAS&yQXCC@W8CM*5v1`K+$6!HUo+or71lwY8&u_b)|Dou9nkn2tloda`j zG%Kpq3hy;qAi8^Cs`6T=2mZ9#D?cBsl>6j5Rla*8Z&mff2H)=0FK#UCS8LqFtn(_A z%YPErQiEN+T=6Y(5f06gn_Z{5wyYGj$x4(JD~r^>8Nlpj@I}tWAKzGa>!WPfrejZ9 z-4vDI99Ul$cd({TT>HkF)v{ZRMQGwG%ZtSqJdTz;?45dQhUF@8=Yreu@(LWc8!u<( z#Cy*-Hjm?aC@RCQ{&3s%#XCY9y>Q8R*S<{Lbatuz}Sa0r8`AGeQRItH@zj3^#nB2 znAm*PnJhdX>yky*KQOGbWG{d6bl)V#__Bn@Z|=&=ukBv9DMLVP!LGHMS=Qg~1{L3r z+I8B=b;b&V&FPO~AMUOpM$D*U8Bm?IPBbn<;)K?n8Mpg3%6E>>fn||=K#*>pVuG+Z6sU4j=4jCT$ zqBdjd?*8+ODx9b8fBo*>)wmZLAC>akZi&K4;NGjQW2_4`97t~0SJ@`{=?Ja;x~fxg z`>mkZD6yMCogLbH!wy`qG#7jOWtCn{+PjWf&hNskZ`aZe$2F86UA*<(b%PClt!Tvu zCDItWZuZvpJ~nSt+jxns!~5DHM=`;7@!}PeX3WsJ zV|PJtns@@Y!TLa3vtF%3J$g4261ankR)q5tN&-swm6r9s*1oX8%&OkK=iEGlS{u=~ z9)_A%4fYp4eO{wEy+p|1_)9{{-7uTuhR39zRci$(?#s$fc)y0V)AV6Xlhm;S_fMBZ zW;_)#yXLk}I$R zqQCF-(|Tq{3?%n14^he0IDS63Mbs>R{oQS)R~qqSBh--^xl1G zT21xk)7c-N$M9~-IjS$~(`t9orf}=6Gx7zktR5*F;O;F= zWQqEWeo9=V{E8{|u0bbV=QS%*d$>+sNFv5gde_dd?1iM-7KZsc3A~3St;)NVUrhE$ zZ!Sz1mvZEiOwKbaESxVDduGd5Qm9Jn{@1Kpg0Dpso@O+py>hd1ez{qPn<+89cyUFm z-6=Pb?j=Xs{Y4j&Q-i;_XY%l06Uy6fw2qp2b@zkhT=}ixv9Y$tPu*ki*S8u|Zx?OE_@9D<9aDJ^<6en;!hQ@bXF8kxgfH{W1jJ_o|VmJ=>~%%UW~Y^!dc2J3Y8_qU=sdEh8)*|O@ ztF`0qF7aT^IV^ELeDhuLYvud}*LW+YCMN9K$Q?NUc3p^rB1VPM5`^ZO?#A0RCVUl3SqEf zjwe4!VNr++=G?ouL9>wKv2(4ecPMG`HJ4+Pmu=X6qVSu>nnn1W?UFNe4?8};`=C;l z^Q&IEeZ2F&72f$Q3uni0>K#rhS}Lm+8Ot$nz1h4&?e#O-dR@5H;`e__Fdjt86PvcBs5hX3;BU6*TKwQlho(N^)3dm?3Qq^fm|j$fZ+^W@mkDUaFA zmYsQb;hvVr)w4S`emY;d&r^6wuvzg!bH?Clf}1>SHPuvX56*q7-Rygy|A|j>aYO&= zRbi3dN||n_!%KZUXK33no!`(3m`oEL=zD%U+RJgQ}M%~X`d>#C~# z@db4=3yQ1O-MVz6MyvKU$3_mL+5)?a%j6BBJM<3T&{Upw-Fp7TXY$3p`C(Daq6NwC z^^@0_b?x$epSZ8-Bk{%gjD2q+_O9UCy*NfC<^`FyciC6& zhd1vTYKgY#%V%HC&T5otJ14H+nVxE~mCtgu@eN%b9qOI8muSpB{0v2A9i-E8kIdWd zTKbT-ePHLUo*eOQ`O{5a_!=2O{MN}gKX_-TBi;Som7FcXLfK))i@zxH?e-CkeHn7~ zw9@=|G55Ya%h~Ft{Doaam#oMbrNqOMW;{XbYR$6w&YY6o?d`W+MrGfu$Q$Wfh30MA zn$xK?FvEEYx6s2klk4{MygwY|qP>OxnFia=g?@27^734c%h#%u-99LLalsQM{riWP zmT))FSd0ZKs{}=MR-2fZ&Zyv%sHj{hrCvPc3H!HaYuY6ZPaNgsx7XpqwS>O7Mx^{*32m*~2tVOYlAc6k;5zIj_$vMQz-+AXW4IUdZJ z!8YY+$ErKcw^_L7_!x0c_F5l)%Jxa#;eh$|a@W>xBF?$G^6C8*VokX$J&Odgw~D1! zsDJ2;vt&DJwd<|e$wjMPnE9#(wj%peYaL~tevnn$LtOGyeZF2?$QSQ-3lojh_IB(( zE`HUzoNxSv0d^ZBj?wX8ik`)!m^9s*xK#UdS+V^T|Iy+{#Q2Xx;sZ zQ$IbWcfN{}w}+pQw|>^^S$^7SHCAK}*UI8TR@2QveO@bhbdTKMx#VcR*y47(*GpD~ zSyq?n&aavmTL0v_ulu#^1{GP$1>H(%TV^~7(>?nJFE4A_xb-$`0-s;VJzMTVJWndC zU03y)(@Mo_On3FtjBL%*6dpI;ezN0qnc&AeS~XpaCH7M-H!n#w+mbt3Z{Y!>zVd*~ zNfLKYT&cOL?A`j-_Fe)ne{Z};EZ##OkGYbM8C#!YMwwPu-;I%N-{ zWddm;Iy-l5+fmKEVp&cT_bY{9@yczl-}(n%mgCpR^wH=OH0Nsjl&95=Tb%h})}FYp zSv6Z^cf5BvR~bXCV&B%RIw$XR&g*>50P===o2d~llL8+kuU2zQFjPMqme9GUus&nv zb6vrD)6ViMlaF;|>t0t{A2EB4-IGZS3TrL&9X?5CzQ1hO@kT8&S3t*q(yJ|-T$p!k z+MRsafH~=6%EmU$cO_gz)*8=i8gk}e4{o<#yq7^1m8_d4pTk@eCVrW@GAQ3eA|wbY_zHKc9}{=?quE15Mj0>2xpQQ%B_(( zwTo~%&t8q(`HXgG#gkU+x}!QiA#CM7uQh1*PK)ESE)y z?cK3|(c(78w*$*Ar^h~Z$gV#~IdE{+X2wTO_B@2(2U?pso)^Yco|L=l?h~0mHT}lx zS|KMLzG#>0j<(`i30xhuieBgC%J&vMF6?c%#1zST@szL!E^hzUf&isAJ@0e-l)H|Y zoRubBye@ck#mu^Ul+v{Xf=lEXEXJg4TCpZ{#xLiIB2-5l&B+hcj#OU_+A zuzlI5&>Idbu6~|)C;s1b#jT*qKb?|UuD*`WtkeNwNJa> zFP?s9z~|8}_E(4a_RLeCVQ4)q>$v@ihz1_#bvIVA_==j9rLGN-tt$D6_WN_OE)*9c=J8-@jAqfLbEuLDEgLUdp-FS*v&nTf}Z&_~dz$hcjDT(DQ01 zF3$V*2DMGpZ&ro~+~^Sok1NKJt?1lq5x|Acq)l%gYwE zpJq!(D&9>v&M1CGsaT&-Bes%dOS^)5$zBt2b74h~qnv%GbmJ_9amK6O>6~{AX#BRc zvMj@>Z`a{TMl3Om+Lo&Ary zpR}KQTwLxncYUCr+Gpa_9^{+wMmjFv!eVXsED;G4>AU79SesLg3ppD8ue*A^$+T&bZZ7N)DQI8RHc_t4dnA|8nglptM|G^rz)@;K4dceO;c+1Ci-7DMhFEbv#xQ^LRvKKDzP+3GIrV9=n45BZqfM zwo@VlVUn|q&+@{Om#-K1wo7T&Zc#kvA}t)kDA-Fl8QE^1?$zC}U~6l|k>ddzBKa+o zy6pD*i=S#%tTOXno3i71YVEO2%gBA7XDwd2>4fzRQ+=&dbLt|G=vVJvwQ`%W|JwSc zryK&0rFb9R%w;o0|8s-aiT30j{EI)$7qVrYezd#dsYb!B=L_mJZcRG;)**CexJY*7 zmzl~R$s#15%A$9XZR%fFS1vjsloEMn$J(!zu`XMimewn%eNG`2>T^7C?XwqIxvx^8 z=tCy!yvs>1#Vre}xnA)^d?ttbQSPsK-eM+{QW_ye-u1L)FQ1kXzpZ8GmqzlswT+je z5^qa$NuJsx`k;~P(cC17(39b;1j>=9zAj_c&ZV-&EH&L>iwBqrYu~JES^UtmSMrS+ zsXx4*+{~8xp2BElwq(f%9yWoN@UqS+j!o4%LWk~XFE6-6NM0RUCZ*OB!una#-Yvps z_DdU4rmE!K8`9q>Zg&h%P7a&DvShl`&5aK;!``sEBHrp3C+*%Q^yd80Dv5{D>@fvo?Ci9 zRl`bfkNmU4hCYr<`V3O;Z_?G7A93D~f3`r3+!cjtlCA%f#+hfoKDaM&ER|JSO>J7K zMq5Zj=#68B>h^KT=Wkuq;MmY)z)<>THtj%RebJ0k+saq^eMgsESu7PBGMB7zv%b$$ zIN?ls_T2+Q?yPn7H?ALMinrH^jZ;DAn0hupBA&^b9aVWhY3D~r{k<$t?f4jX8F!fY zZ@qX~Y~51UJsj(|wH;WSCie}0>Y;?Ij`&p0O}&MSUj{5=oswGi&W$0YXi6|gPPKXb zo1$=Eex_+Rb#7j8yD`m~{kq0(o2jL1BPv_P5?i_-+&#FWjTaYq|9=27K+V6mQbZymqpPJv89F*R`=x9thhuiH ztLDujK{V$2wDZtMeA)si4twg`B)#KnS00)`JfBfz*W!kh{(T{@ zx+AQMXLLS;`VWi}X2UI9w7!&tC+cQNgI5OV(}TodHJFQTW*~|{n50px{3q9<4(DSc zOQE~=N(m>ay>-p?nG&+6n64})%ssG7CWlt$5pF|^eB~BFV{j+2mOYZUV9}rRF8R#7g4K9nz1`2O$uHmmVPtJN)7~gE>J*d8f7l zJq%+`-6>fU0^Kq)tQbvp(TX2K3IMkv7ppZl)_ebBh{HyVz7WNQW!g*BL;HFv%gc@Xg96oQrm5Q+z)%V&-*y>K9SS_<&t5IwaZnR9GV2epKEiV77kCtj zKlmfrS|PC}C;(gGL$W>5{JWzcMQ1Y8Z39?uO_{Q_RoqlK<-8QLD?)Ut(O%}nrW9Cy zZSjB8;xO8q_tlg6S$Q6{d<1}sMQw53=`SC@WJz3HL<)_NlwWF-xRqOfZg3WT!21$C zSks=JXil5Z?lp{AurQ7$!~YTN2auE3?@Ja1wbE&44hAj!W*YT*E|)@}MY@=6(+b`J zB!Eb9q>VghUDG?JUma(n#cE;^KF#J9v+d=ZtJ8Ti0XO?oCMa%=L)-=?oDPhqgzfxOA4vuON4`E;8+zgCn2pwXGC3)<4KcE{5x3wUY#y#=?bq~)Bc=#ET#o0;`z zW#_`&TAWQXn?Bcu7#2XW*Gy;^1tOKA@F zH_8T7$3H1}+!m@r>8qA{cz%-CwgQ0ywE(?gYDz@kQBRkpqh0N2jo(HwZ%-~dRl@I& zi0JhIa8>Y8TI_%a1!!Z59q5!`at{Sx$ocfu>^JrpHgQOF!|LdoA6G_DeO)Dhtis#d zc&XvU!64; z4wd!cjVMeXnI!rcLZF&)bjpvVC}*^^wglLsBOprkpwrO5A?O*^;tZOg{X~n$b#M0) z3D`O=kjt3Q_1=>&^ZkP*RT6sjh4)P1cHApNpO_mIb8M=R8%^=T;GyO3>1vL^tlj# zWGMyN!yVn%yUi`8hv{-y#vgf+<>Q)3@~u3^gT4cP>OJ2ly3GYpg@SlUd!-gS7cY#X ziUV`MEYm=l>B*WmhReN|ttNsp?YnDb*UF&%*+CH#7^|_QLi-1+wcC#zS7FJLvY~_f z)t!27RkVh&5umEe51Ov>!TJX0y!EoXY@`1ca4l74ezx10Q7o>sJ_-+|_%xdfBP4zr zN-I^g+N#rcoNG9K2zM(Nq3|LVC(E5XvBfTh4Vlm4nejmN2{beTdA&aEM`CR!B}wVY zNbAx9BITv(CG3`vV!{)Y-z`UZ$0>4Q4Ej+mIyitWYg!R`yz6Z#lqYmZi)i8((fL^1 z!Qd!Clus~xwo-K&^Qa!@OLq}NI&AK7Ru4tGn&vtpIVlVt{me}KpFc}06P*As{5pyY z$ZjAy#mn4clc2Y@ar0Y$QMe-ED728t<;&ptqD~S|PtbCDHShJEOXg=&8q+Wfde3Ay zMfJ%Chof(PG{7;Myf{3Cf9MoxqMhgNg4@1`gg|W$Y&uctwHdWTYT4SK$dch^0me?4 z2~&8sGgfu0Hr_$UG1lnbafudL*|F57q<7jAx4F6=KU9C=JW3}+MuCa#Dj_20;*`pd zgZzK_LJg01O=DsZyaS@rvqDuV4Wa?tc10C|?%?H$HiG|Rmf{W6I~bC9k(!!d9gO-J z_r&*hkBBfZwC=Rjf06l(Vil23T0IqLpt{B+W(jhb!N#OJ!X*d{Dw;M^M>l0yoIl~l z#LKZ(SMT(%d6;9y`u?x=-EtXv)h)QRMaK>?;Pv7m*}~Qg8uK-nn-!a_>bdRAm$yg^ zXjWOwO`f~MH+N-}UCFn|)eZ6$gcJGeL3(_LbD}@f++~w!RRGH6wao?$bA0ZHkr1ke zeww^;psLo_>|!C?Q~>cJw}o-Q5DF$;tta8M0KqJp-v@QJXAT5~GXui$WjZ2-8|RvMrkvhh{JN+Zuk2qX|*4$zE2_{;M^Ar2g*tD9tX&uTdD< z!&4|~t*!vczbqohkkF!4StWFCGrnfX0R-gPjO)lovLUZ8@J&_$wZjx8sdxwQmc3;U zTFuyR1-XZ+WRKgfxzvH)lA%GwVsP+cXsioyVD@6pFAi zjBBV>D4FVzm;o8}^iD#&qlZlt-6|*}D-x0e3W~DfbgCK?k+sf!$XiUI>-%`qnN-Ug$N0E zlP*6&f6?z+;#@!}kojwGlpQm~t=BpJ<$1egn1J9ZNxv}>Ojb=Z(0xef5u`SM_vbw4 zC%n}DZ;7MNwnA_Mq(>!hDI7)f9ha7RTJ%b?SVllOr_>IfWmcoZ6>hx>!I`cJJUq^l z3=ap@y)EF&tIzI9(8+uG*2x!N%1Va1Tep0UR8De5T3bShT&pSk412;v^h-v={)0>@ z+wcV{(N%4LM#BB-Q@;XQ+NWu{H5L+74xiALW^+V;KX)dhvG~Sis2zL{8z8$qD9MWj zX~R2WgRlFir)ag0Iz$+gp8ru4`lU=$_h;h#F)O5>=ssI>-U0=f- z_?r=1bD~`H+n88WhQ;aAe183$ctHM66y;%w>j{tIql}}~GggnE! zcxS?>N%ZDiJoP9^0v$W!%$#xP{-|ulfhij5)Fpq13Y5H(Plf$ zcx?iWS828ZLNkU`w{em9|6pO<+_T=ldphiepb3N>Ht3xOQ)p<$u>p|tFEfv?cZ*{A z)=*|+IOY9ntMQo`Gb)F6NKt{9Nd#26O_mknK@YRB?hvYs^CH=bJDR-~kYVEQ$>bwj z6P%bl@^-#_z}VP&&ui~a2z#BED5rkG$PHg{uJ$g;!T>IbuBny7s% z{0%&`A3AUm^_RvD;2ws1cA`fAkG-g8jDLG}|`>@57xlkXdSr^3P@9i=`RQQVcYCglRN~`U>9TGzAVSGQsnQST4c!)Eb^f)Z-gpeV2vaYy3G*QE zitO>&llG9?o^rw?4fzjE32+SGI5|{OXDe=el>#Xw>8&IwKAjKI&Kx(lxClYr_KcBi zfp(D_c{1z_9CP3o-7ASTL^2-Vm8`uiUL$*}I$@9xaf4CXG;Owp{<1Z{ng<6r|6#Z( zaBpQK|AM0*OYu$iTNKyH2Q*|a47Rs<)+#1Bkm!9!9()r+`lVc$6uGCX@TDV#M;VVk zbbumv`BGPT>QgT;KP%BwhIlYbRR&h|&S?V>zJ9p8<4tI~8`STSwESkx0)hTx>xsWS z4i0#OOYT$R@5oJFa*`hJuC^9lwE@{3Cy$_j4Jky#ehIMBlux#r?EE7k zf_5-Wp_>c~YD#Zg2m>(K>g^<@!YPYag?m=)V|qhH|1|5^iQ$&(akx=)U@65vYH@BtxGt>yXQd&FO-%UlGx(>e~YeUUZc(3a}n08Sas zmV zJDhu=w|}}saKfv#pHQeqJM-soEDFCQXOQ^cX&i-lp-{8msw7C)DUeGMaSOS_(&1u&O-XKo%te<<__YSbsJ)l;UD6&ZxG&|b5{Y>9*gpar`8%|NKt-!$HiKOt zTLh=OJ)swCv1vK%N_DXjeW>1z#GavTwk(R%v7x`8Wn*iGxecB*i!-?40tVpgZOMgr z3VTm?F$>;UY4IaH|9J-kLeiAjSi~35Jiqv;kN{47=g6o?6b_E=9P=WlEZncG%i*Bx zuxBUIyco5ci5sYegD=Dib@B%U)PQsqtlsom_?TK?2>5JX16{k6n=!oKf=_}H zm{K{SPp$smcGuZHpLC-u{Gkoc`W2fEg|)C_h-got#<3C5mveZkKAUqA*gS! zP zxr?hx?Yz(w1&u~vNLb>%0-I&c-ON{;^5VjvL%6!NgI=lbuS~MyeZ<7R_8yCLM}Mo1 zgZgHlA~mx_sazgyeHIHZRHyLUb<<3^yOwEu$(k6vmp=NPBqi!UQRK9G+^CE=3&ZZK z6*z5a+6nwxfoz}d6keXoq=nMl5Y04K(+C%soYGCrZ_8{#dA9wLb5LvVJRy0WjOc4> z;_GQpikl%k&eJ&?KunHBvCtlgWni8gcjUAA)N=?UV*!Yb~)HT_a0&^YN#7w`Eyd zZJuRzXI;k@zyYi_Rucv^elF<$bZ3jmpNFOb+D%Aur|(6~JzZ1+qeP-}>r84p`0{Fp zMyHx^y2AHsm46ZyvLm1)QhnUH6BTI_gy2W!1k*;#dr_V|>?>Y?iTWI@d0p%2R?67m zUzfmF__QyZSfiHD^wBzOmIuKw*{n-!Ph{p;#DIdyy{{SA`99?88lx@v@c<+J*xxbO zR}LsS8@&q-xspyh0mDqi6XT{ZM&nw8Te_^I5J|q$x3G zd$52#3|&Pfs;6D<+bU^vfuPf{w30xJ;r~YKOAh-Ize@#a?@|1bO4zJUUtdC>0nQ{V z)vjqu2Vk4Dg|$qivn**ZJYL)Ise>tDf?{X_HmGl=aA!w~5ml_15)iU;+;N^7J6^-m z+``~GjU`O^Ml`h{XR`iZJX2x_Z~7awDyhQ76lD_=AF90uVVB86nJEnn7|(Gq%mv$A zsF2tc2!;m7Cef&&1BJ+2wgf)aj<#KL^d)53kp&~C4urfPCZmVuhix*|coEtJHo5Dp zF=%hX;U0^_B?xNt5;bpAJ9ZliwK}%YOXLN@#?@t8Hz#&b~3XKeE)Zbqt zkXoQQB~(bMG2`~>7wy>3+pfR1oyYCbpWCuOw_Ja1MT_mw58McM)ZP9y5ihq^pO6N2Zvkxen(&Rcl(3rR)mH~&z@5>e*+My{ z7{#0w0Ag)(J%Z{4K@|-qBVCXRur{mDRVSkG;^8y! z_4Y0q6e}*jBum^+yI_?V_@}qjRKg$1hiO!koQH8*8^<|{y-^3A4Grz|!^if2%RWx{qpbAXqvpTIz7E;;%yY_0&6&*0TKD}l z`Es>$oY6&SY_~gBnQhKJ&VONU4L95_lAgt`5;BKm4`81E{nY)tb_ zNtBh3t8#hie4Z5%kz;yw-_=ccg7J%^`W%4crqee=`9<9F%XVI43nS9*h+ADr_6#%7 zxA6e>=OmaFy&SDahRK5pi3_{WwXILN&ju1N$x*>X@D2hL*PZhZ-26dKq>%Q98Jyl| z*FNAa0RoQgeG}IVf^}=VQ8lZLc^$*pqVcT7!X49QvRZ1|-9=;%n$EpIX0kbr0W++s zGs=G5%Z~Vnb78>W#n5Yz8Ga8Jb118KwGlyRg4w1OUc_j8=77vswx z8d~ni0b+GD&X7t4_xGK?uLTL~RW6%Hg9tnfwXrJp(7CyDyopwH_Bl7jF$?^sMaEMc z!N6m`Gu<@Q(F==mF~w|u*c7A@kvWji!{YsI6l4daBbX#-0bT{;91xEGXUy?T`vfH( zG%H&J%n^){u)?I_<%*(!q0T?QKN1KgXG!!*9yNs)_h-4ZnEelf7InvED)oKu_1myb z9E;k|#J6RhG7kkX^;Vusz4bcVCv103^S@S)5VHsqp5;L+LdOrelDOZB`A(n$$eX{% zu{48H^FdBvR8IIP|5|EBO$rVliM9DsYWcTEAf)0lB5d?Yvc}oOE>62`_yU|h$vR-o2^9hmXt=i`M2aHsf-%;p(BYI9VhxHLCgi3lpULbmkHZLM`u3*Z0A+A@QvsWe5m&MwU)6yi8JR{^=r_9$0^?FJm@uoZ}_d1l-<@8>mL|GHJ)}N>aIKQdKmDwUh8a&|9B1xI9qomqLQC11m=4adP4L~v1V6y}$=q}2e3o1S zM=63Rwr#>*eDp*G=wVf~Aq@(Ys5M)&=W!RI*aF6~OdFDFpeG^WbMRzp{_xmrWZ?ft z73s;(WB#3KnxR_GA8x;@;oLB_v=RARTZRRKV{nh?DHJc_g=-$w)>N>jW8e~wJHg+XUo%o^^__gng3eP#y8@o=EH`@s-J!R!0uU~%`iV!v%DzGd%suR z?#yN@qE!eW*5e&Y2}mnP)2ATm;JvU+yIQS1Jj;m+7hcrqt&;lMxMOrS?LDz&;%~DS z-yIRc<*NBFOqcgsGp?$e3{-;B!@{%RS?RVlPHA#D4Abozfef_YJR0Uf^AJX>@`I`72cu+$hl40IECJ%T1sJh38gZ#un`l4mUDn>1NJ3I%xNR zPPYw1ixC%=sj=4=$tCJ*6WNDRUf*$925ii%+DpVYMdp1u`h~pQ*KlCG(Kd&Ixv>y% zwLAFFU)lQ6#cldPDH-%SbfBpoRJlB_u)p}PCU`g%IX9fU=@y9`0h}iRFfLc~7B3ar zmvOSqPW>Xrd|tOzXsxFlF(s1hR!jT*@)i&r?;C&bQT_W*(5aY6^@&H5uG!J>rVV8a zHFZgEmbV7By#(YSbu{6t@)Z$FFi)WnB{w<*fP>JnugKMqjoUy7P9YRGhd zqZ~Mv+CuA?O%`f@2)&pR==Tc@GmuGF58PQUVTbij3+xoctOT!>Kqg9DfQr8uM$3D& z_dnwAo3Ko0M>O`}$?YG0a1bEFZvkjoK4`zA8ebl%Q8Urc6 z;<-W?qL@nTrUFJ|)&o&a3;Z`=CQx5j7>=;LBV^6Jc5j4snqqeaw#Z%LNs05d&%+1m#_#_0tq1hvuuQ#z z1DtI#Q#sAgAJBJV?y7{E?w5*NixAadkS3@P`)uI#UpNra6`gao0N35w{bQ`i+ML@FY0t zHo}|2V3Fj1Gub~)c9QLr44rVJ~#<6jy*Y^|R3&YB30 zd%~Qa8js{Wn6nU2(cnl=0pEwp#N2OHH%EFho@m}U95GBcahMfzRZde51vR&xb<2ZA ztZk!aUN9Nn(~%76r>I7YgC1tXP|_CB3USB8zZI$`+}c>{e51jBrzT$m=r%1WQcDQ> zvCvVii zpZ4ird^n@pdIzyAM z6uK25cg-Jk9WES(2_-L>Jg*6uqwUrP-n?`-_`gsXR_&xZ^ zf5@B(d@WFy{7+W9uX?Ak&i87b{|bPk`FQp%pI8{Hwwf`aD%!4g0>=xA7@9EtBwPWV zy9h17J`TMAO#X%9)HQEo><-R*RA)3}rCRZ`@hkWF;@auKLby~hw2>Qv7n!DnbUt6@ z_UZTV+uz}||6x_<@Y5gRw%@~MuePnn@X=r4uK!`HKf;vnb0ew>-GzijnGuyf8xG{x zREh&ewz`2IrVGuwRJftC1qZa{G3!I-XA?lZI+Cs`YVQOYiHM2;sdh%52KWboy&wys zm(0$WLj%%NVC7yj=GukYTLb@LUlJHxGi;&ZW{QuanW#Z`FhOr%|0N64MyPaJ5W2vt zOf5#>7sVhW!6-)X&pBcs%8uogku`{caMAQ&IfO~;>iOZW^?1K##6Drj0;Jg3rd6Iw z=y_!L8TlKKV_-@=wVKOqCEJ9591C{UzCcRa)ZOyhn)^6EYaEu~3eq;f!>W7n4I1llfE7cMESW z7}tdwagb1v12Y}9Bdx)zS9?TAs8Kq0;&3;{eu8@jOsFc$W6O(;BddpPv)8F)4}r{J zdH-j8^BT~F~VR&X9leKS*GuYm_U3Ih4t!H`!H#S1>1GvO)N z<*p*!m-T`v_<*d6xL3oZ2H9L@XcMHQlAQgx{aimFH0&fp zpf~GX9^CAg#dtn;Zq!gW`vzt*qbb@_bx;xZ8FBT8zr;L|=Hv|9qUK5{5MNGHg}W}5 zR!J&3$2Snnxz&-;;?VxlWs=ZI0a}-=PcCk;31|x{`e}Ko!bHwijAxz#Dy$#q=j#7J z{{G9NzM=nfuP3o*W0!?u6-VT>pE(^!K>J6DlNAuGKi~Q5w+Tns5*$1Bc$2Bnc&X1g z9WnFoG7|=VQMVo5!})5NV(&BEHJ`D=*kIzNy2ggKUyKuowwNDS!x|6|5G2gLv= zGP#Cd#R4CZj31dY078Po|A3h%)}Y-$v;;6Hm#wsvR2{08S3@A0Ha%#^Sg30%r!xSHf1O*2BxPRL)Jq@K$j^22tv49XU=hKQbyal@s7qpfFie zUWeFQ!KbPxc-~dVzT1i59FPfueWez9zv0LBT0XeM_<=!UOym|JhNgH5(gNjOrwzsm zr4XKv5d2hB<*9$EK^eD>8=CF!NtRy;C2~VfaqMnh7#a@RwD&UDN;QMqd6UV5n6BF9 zgDWZ%0$D)mIUEd6j9PN2$i6W7L`PYh>eaiBpph@;Dqr2$+_tCI&j5Cu$+k+PFszN- zGScD~N#~gVaYezZ;YC}j7IB+fSxBu9f!^0-k<`b&QpQI;dUMEc@2Gutr}F|7d31D9 zqjDb{t%(P!!nO6&m@~^(P(~q`-^x5_U+HOj-^GNEaPBrmf9X@thF@je8PO}XR+F{J z3zz0A--m=axJO(H2^mCH_JHX!GELXw`?cwCSVN9`!vcfj7=d~r>==sI84)X*h1b8w z(wLDFUIL`WtDinVvjnflcSoFS=XhAzTIXoq(Tdf*b@mu2@?~Z)&FfIz1zJ`8ZAjbt zf5lmB{;7RQ$IQmqbAEs^)p~QD;zl-4_R@{&Y}K`uVxIP4h0)~%#y1PVU+%UCa28Y( zf3S{-)}=vBsuOp&;w_BSOI|hdcbr>n%i)Q^Wjmd=GvYL2BDrVD{U6KyKUcc^JHe9f zE@7IIGNva6DPs@~s@GC$fOXY@{*Zu#*ajUhSr8y$zGbrJop?BhcY1_v{FY}@{+W7#S-6t{0V469UNnBed2-f z$mbDesJM>kWfyUAvLPg=*#zD^f+)6UFj;7H>`SlBd2ssLitPbN^n_(G)fr>}T@WUJp$L(|mnY7@k{s>TyKZ2- z;?2wss3F86oBD;CDq8%^K(ySM zdFC`63}o@0G6-jv3v(vqlPFbz#s{E&SO15Hdkeb4s0b0jgtSLmW}dFbCWmzv z3cme~YTRKeQ$_RuWC>)9r?QFdcNc+oboh%^5@fm1f9alR6VN>bWv_Q>;Sg~jD=MRe zl%C*}R2CNde8`N9vkq>i&Vs_`KRTTt?biifi4meCw{MN(LJ?NypHOeN@O^RLGm{LXuH11{T=l%d5(xO~Ajs``DB{8x&G*36bBRUCe)hos}oA66Doj&Z1M5BN~0I9#Z zH3ZR1Dz2`ho{LlI!b9=U9&~fa3Vj2lS}{`4Q~eKIu@<&Rk5fO?JCR@{B|Hm|rO1HY z*@3IwbA%kZTG(s#eLJUespLk7!h)G$tVoCXz29-#)hR4I9?F*%61N%ELM^`B=mIrZ2ymk`HPyQo zb6%Bf{(|}TQ;`E+1!BnX2~ZUom-XzrAR{c1B9nj0-a#*AsjA&uN`c$g(=)plTSa<8 z&LB`Z$bt7v*k;q%aouoE#8d3|eq%}}ftyZf>>cZe-8Bim{1I0a|7lD+5N@lfCK^kJ zcN{d=y!?HIogc%P^i+UjIBwc~8lmu}1X>NMd$Bg;I8p5q_DrcrnNYPv6Y8O1(*;z{ zlhiX6;P4Jsb17iV-}Nmr8A6obb5yOd>H?K6h_)$g273N~wy6q1=(z0y{SC`uU#3H& zB_1x^zd)~VNE7f3^xm+UGDt1^EGDd)yl^}qS(?{+r#tIpOqkF~qbdDcWUx2F4Q4$^Digd%MNV2={FIPQnLR4IZdK znR;uIRUYw&$>v+DGq3siD(P+E+^x9ePHc5<|5~h-b9uCj;0q6e(7IoW)yTLc>!2m0 zZ!3#>8H#I8*4URQP*_i#DjHta9Hmrxau{YyiQ&tb;5H^XEMFEKqd$mm$J>NB2(-D^ zW9CedMv%UtFRRC&uyw-j8=Ox*QG{P#*~CT-I+qqS*M-DTD&1LmD=?+Tji5DL95S1n zW(JB7PYj24$=|oo9_jXdC~-DZ-wJ;Cid|dFy>;gCwWOF0{JiYRGHJj5j;7a$7?u>5 zntf45ZZmT|0|%^tb9>)X4=Y#Bqyq~J+Hb^+#0VrlPX2;_cJnfgNd8hSyfrY)^DT(q zhcK8xr02EcjOW@2Si?>QSiE!@`uE>6#eYM)cI-Spn+wS)jI34=UdRo~PNwMg+Hrs3 zfBV4tL_Cy0k#p#0(f+NgUvC?`bcT{3R2?<78Rbrow<|3h-X4>z`c0;-zK`c~p|X(XaFe7sSE)v# z6g)rOs@+d&C2YSJIpoE7?O^~~e3hbaw@v;3ZbI;B~|P*lQ{>04J3 z9|6)n(?;IO{W5Qt0OI}n%;kkA4EoUpXDx4ddKic_u%+37h9BqE%H%#Z3IHvn3_@y=wE8!dE_3JF z^%0^E7R(m5$d|Sw&XtQ<$bB&SKE50*I~94d>;VSTHn$k^q@GBCKkQon^rbU1C3*x$>T^V5&twb53P^S$Dou<}s)EUeg`!>Z@i%qq(O^>(qh2{)N`{hwcq4eR{+TUnT^vOes?0mZ z7U19DL&u&-y40p53`_fbx`ZB{HJaPKJ3?)*Cq5e+ZWDUGtZ0)JdzoR$#Vi3;2hI{M zlgH=lgXOw=ejt9Yz*t9wlNmv=Ou&`&PYUC;r3$$o^CX-XOFKJAanlJ5VZFibCZX|( zTsN2`m}QLTiOcn_G2!Z6{R99cg0^YJFeV}oMnG_sx7%X&&;Us1_2tp1Jo;HQk8D@1 zWwbOt80K$-q_;O}(+BmD}B&H~g7VOB67eao-mIG8pl6#i$nS z{n$!IrDaE?a&+UGZ}HT18VFYc1GriQbd_s`qOoW-(*E}zz~v%Tgf3^&ZyOey%NNaN zmU6Lg?xiKaAJFox4e`6c0B11XQ#!~uDUBtgF~?{yhVrNg$>V=`DcV&6;jQvsRtzpK z8~7@*vLAB&t_Z|nh6_o2Yiy(qOBH z;U`XLjWW`BkdO_tfgv@BQ9gDwL+lEqgToBVgxSoEmjsO&liukdBsa#?^uldSxmffX z0$k?qow?)_w3?D9ra;?w;PfA~HZTzV&INL<;m2}z2h?33aKVW32K#Gt9NZ850E=QQ z>w$5%jYFK;Thj>%%LU3Xa)FIp9gK;L4bRTr3+kUn%@9XHm*3n`E30bRn~E=(QF|LZ z-YWhw&z?>SyZWbZ0uQ%DU<)`9{{o2iGp!H)pzYv4rmYmu;YHS*{k72iw#&g%)NL5$ zpgwUNzFa$Nip$x}mbdd+vf_34$Bhkk@-VY?MEo_{>vcAWg`-&V4f3O z)EIcDN|1IkL0@b{^OS4Xx1UMLx+~c|X$m*Yao|WEm2NN0zhM))1{Vgd(U*~kw#80d zDhuozcHQbS8wP@%9ez8G+?}3VJD!8+FA;bG(QXDKn!15?Y^0ZNVZW6ng2Mz#U5t^O zih;*)wb<9QvMpUAU-Pie`VU0jk!C9jZ}*me*;*R0%*Os=1%}x-(iw~(fyLYvqOSBA z^qgvU@W!s`M*tJQZ~t&n)E`XA8=m(F|A568%L99*uyk&liY$2`0<|`20u=89AA8M1 zQYr&%6qfLB%QP0i2OQ*K=c!0h=1tlAdNvtB=goWKq*G8rNi-THfAiFTJDwl!QRbEZ zDSfLI_UL_A$XbV5_|QL@OY23lD(IrkM#b}|y<5dN!0W03p)m3=fo(%2x#0=Q_g{D? zZ=y?z6)ZDL=0dK`Pb4ciNKxLXdgSeA#q8Aj%(^3zVhPk7F>wH|u5`Nsfr5HDiI zqS~4}TN0CESeqKNq|8*$c_p}4d0{RD^#L4@`kV?Vi!6#>*Qst;> z4k;VGD<W-{=uE}Dg*IvUPg)YyTkl(G>Zw4S4^ zK=XV@7S%EmYcHDdY57g*L)IuGdtZEyrgzNh5J#iYd(NdyX!I){qkaOcowWA>`g z^bgmVFTv~vUSq7Dj{eJ^+ucF;MgYT#P~ivBDENB*=$dwdL3A3gita%Sd$5MC_{Q%f z8UmqAb{B70o4W&P-VAW6lQk~t$Ap{XSL{P(()XKp!N#~mUL2eKw>=UeueX#7u~^@_ zU5hmT5T10)CL05@n(1vm0@IzZ4*<~|_f4GiR9tabZtSDHxwPZLJ(E_c7nDj)OC1F_ z%*mf4CH+ca&rO6M_#xwt(TTP2#gw`om~aZv2D2mz{Hp&wc+h-6a7sGV{Wjq@#M)mv zWsj^ch7XikVS*g@`cAHlaRz_b8!wX7VP+hP|1d|IVw zH|hSAj$y{D5tH}nKT2!saaZmqmqsmM_8v@zM^!`zi$Dy&5Myn986J1z!fE;$oAV@3O99qGTv2&vX?vd48)l@ot~ zk5^NWXN}ZKuNzJ*@&1l)t>i6uI0B?69I~Hf+_klJTi6Gf|1hYeqn7tl0wG<;WcVjr zWcpJz{9{bSe8`~&4}_L{FYnv2dx-EqrRsqiN13-*?V&?ld{1F=z^wOOP~a8$g~{(& zy37|E(YNLH>KE|atL@xt?XGhBbxZhewf5~D_SU$5-8cQaH~V%6_SB&E3E`*EUfdaI zl5Eae>u8T#m6ldMI6r~~+wnZ6c#($ejYQ&^ck%JWP*ye@h0^1-X8%HLmwA@|cp$m* zc6hz$w|4#ehhqIA3OK|-Zpz;9aGnKn0(n(#5qIzc29&6e4qxE9y8~bwozNGELMoDi zl)%?3>s$mN^7v`Z3{QV zb|QwmcPhBkUd?GcK}gka?N`WP-DJzP+0e&q030AP=8| zdCSMKAmG*LP!i;n*LxpG%Qir-PRrC2{L;(47p^;5bXDKbu+wV4P&*Dq{pGza2?_1E z^QrW}yh@*EvT)mT`lH}ehOgeO{j~b4p*fD(XA=9&`gdhEyS*5D+I{_RL=}yvaV(pH zq37a714u< z;v(!RS?atKqlmZ=#(ZP3F*FoQPHh%_L&pu#P%Gi`c|x|QBT6K+!8>deeLXn05<#a0 z9dw%MLSD(~PTevMlF%9oxs-5Nb?j z&TMWD0k{~`!s$2yvozmZ1;#FDPXc=6OlDCS=X;lUdjefi3zS{HPp+tgI^M^DkA{YU zQB(SbdWo{W!y$uEcoRTOH;YPsoIXKJQ0%VziAJ95(A3?vKEJe+XWJ4cTJp}5+M?Ux zK%q=|`8+bEx{C$^W5z+9U<{<+f%4v6vM?eaUaWhUL31;H5a`qGOp$p1hMrPoULej! zfI3BoHRF`DZv*+_UzEuqh?g6UO(%0hb~y86R!MdOV8}E~(^4k*0W2s> z6dmBEHuIJt0k5D|1)UDFl(55@gBl+qqb#GrXBr|EO*c0R7iJe$?K}675Vf+ z$8B>3shsG=?jDA7OM;_#7v>oB3&#>uk6UVx$zoe8%(B_-NSZ=clk}vRG-F*#%rgGv z%|1KeATff7Dn08Tu&3Ub49nuNcAPLTtHW>HV@!4GXjE$usx z#E9VRGOB>!C(NW7dn}cSqmXgc^|YYljr(5}IwtJ-zaq=%tsMFcF>q3>jhq30b#|Va zkMu`{vE$m5Ms$EyayRp+ojev^YfRSuzV&Uitn22$*ZP&C(-xna@I%hBRoRxgc4GEV z#Q1pXFpKyuWM4S}2R(67V(gDOl#3HSU7r+;Jka!jHXv8RRt8d%Nn=N%R*(tA`{SOPM?>FLR>#&agPp8a=&Rc~7Kt5OAC5 zw%)}&_SS3AQOB^3^UFNw%a{FSIFgx*gd$+91}?7-Go%hq$1={ciPE9QiG_NCjW6iX zFmXmvRiaSwD~YN{G#`74nvueNN{YLDne1->CZCQOzyuu7ytF}SIy*0|gnHE8x7qr& zOBGi_Ypj$TZdPW@h$^5TCCPS%WFLitq;LTbPoBTE4x_Mxm%6)|xgRA+!N%pVOOzt8 z9Ao2%7dJaUIeVmi0O>buzrY5imjCRAvP*~HXI2JlSy7bG8zZU`q^;B>64aTdbKehE zhOoZ+95!M=U~D34X+X{{9S(|Lsr*sek~iIp{8{m>xDCJl#OM$W#Y#DZ6mVExsd@}& zzq#N=5Jgd^@e;$1)`lG8BR~XEXUr&yTfok%WaBmo7wS1UW~ba;ix%k_tC|Rxqr5r! zsV<=@ceod3MUUZpda z`G}HWu28G;Y2Nt7586%IeX|y)AX+Z+vfU!<0{mN;rfWdj=GU_w;RbpN$ zYeGp>2_+SqXM%9YV|rmFpI&B`MO{46;@uTdkH0WXQsn25B8B6t{VMi9@Lei&?$ZpI z-NplHU~HS?T-;kvEZqr8Fv|O(Dhd}lbB1jP6r}cg`>#+V1;{P_jA#vkQrCkT4Ii2r z!`}D|SIK7=E(K0}7dY)F_3Ui&xuu+W@ZR;!vI}H{S9#KPCfm4y-GNYU5>t6&f#MIH!>BH<~?u~nJl&hF9)@%RuCAqL`yO5S$T!aF3 z)f>3B)b*HgnB^_?wQNn!JUA`%-Wh)>V8;%9>D<|svUolvS@+71sXq0#Ny!DTUhsvG zqWIJiXD`n&EIJ{&i!=Q>5C=R_Ai)tCenfDq|{v!bFT;b?0}N?d{;?qt_`Ftlzy!O(k1n+wB=Wo!b`%7Vcq49$K*%J*SRsHH?1*h>Np+rOp6xqClfc79?P}3$Hy+5bwHv$gV$fo{%7i8yfVN7JF zE|MwdLQa7yA9!J&U;dMikv)TVlf1Q*oh{;>oMY;Is>plpLqaegtLrsZZoYP-&Mrp! zaVMCJBlq5Q_DCcg^Nn21-9QK1nW7=LUZP7GHsXA0W~%;WW~d`~nsG)2HhFgQ1b(MB zyLk1ui_Z{IWJw&5JX?0uCqAA9+xeM0*{elW5qV;z%sauD3Z^H_K~oyquDP2%lChwt zSi=M9;&b_j-l8&hHWpjf+zEiAY0L?UyL)ar0UPRBhirw@Aq+Y#v{A|e7cGf!wwXA` zmCMqH7oq53!xV}GJwR;$2^U|ZrO+x-gIN^gc~4*hYah?E6x8q?rfalwm4K-#H0)Y5 z>le;Pnepq*aDh`2`IBGLj0BSp-L_h$Bmb6KRrti?#6)$lm3dKGJ7T37WerF|4?DNl z6HR5}`byT4%|a|z=;2-NQHgob9A1!AICU$5a1aZYIqFx*Q1EjMe9rehw(W4Kmcb1` zgBRtM_Fo8b4ZdjH>A=M2%p{PsEQEMqqO#gca>Gwgol2fWNPdXD?un@O7AH9t+`2vmQ-?#yn&U-3Wss}RR)phCW9nC#&AmMz>CQY=h zcR}UU!t1MX8B+=QZM@3bR7>nALQ223PWx|>WHSNd6105LjdY(lV5h0&<#`H2Xp^4K z>3wpKi0MwFCySTmEWcYnO{TD;b-rv(hfwba|w*U)eANs)wq=N|)X|zSm?lV%emJkvlX8 zH0nB7!$kQ4#NYJqY8%Embrn*WCTpxuFwwcr_74mrpciU|k!R}qrQ0O8xwDE1zw;g_ z+khU~D2(F?<#&h6j!cKBv!t7XW(PaP#G%~Dcw?xa zu$(?RV~N@69p3S;Q(a@)A%RC9t3;bFH+#xFW|M9tLCq#}Tr0#MsJWf}rwAi~bTf<4 zlDKr6{N%SxS%!R|kyG=|@k_RbsIkP^AJA z+uPb+X-?HL@Wh@Y^E%XaxL?p8`N_7p>%-D&v$folv_8hbO2~Zk^hZ=B<>al3T9=fJ z??Gq_N2FH>NJI15fuOlF*UmT%^U%VIjt`)5g$0gi)`XLmkI~ zO$x*}zv?+E1A(QHlda-K!>=xAaF=qG_bnY*ktG}MBL|P&b~-Atn?GAeKC!|C^Xp$d z+Cp>Z1-vhmUfaoVaJOnPR;(1WCP5FE#Fi+#ss+8C2vCEjkBM}~`e)@%5P?XRtUkzo zYb}qH)@xQ;cU+W`jR8t}HRA|QzZ?H#U=9F)_u7Ak?*C62_&>t$zxIDi2L4~&{p<99 zKL5x2|Kl?7o6Ub^;E+N@c`1~D_r#Fi6n_E!40r}x66mr>A@%IlKTE?PzkW_i+^PKM z4kz|;SlYK}4)GG?(NUYvvj?MVf3HU=U?@~4He`am#ic5YTC|}g#<}R{B5}8DzY_Yg z{Tc%EDhr!dTPZbyz#Q=SYy~3Nx!{$wlP4*!dNR1-4Qxhpn+FShcrMcr5r3p5(HN5& zTnL;jd$j5Ktc%c3KI09$%%a=GUA}n$>xIF;RM$9ofUT~Ucw6dB8L<#*mPnW5$L%6Q z-g~XLmji_a!MniVaCHskc0-2$_n5T)T*%7s=H0I@cOBwG2elkxJp0?$k<62u-B@HE z-`!0px*%cBIrdvyXpnjm#69#4n_9(Ovu-B*B@$NC0V_QOP${7fkc9sw%dkH${^i7^ z5}dKzNkDe@pY{&AT;6KoodiZ$81+v$--{E2SJk17c_2%>?hD|^An#S4-@SsdLM}AM zvrAO5BWB(E%X8gSF@W0FkG|*Nw4HEU0Dp?Hu1JhUgR*)o_OWG~*QjS2&I8*$60zaL zt1ZLYv6Bw(k@|%s^>@-GesXN4XurxYm9p|ZL(A7{H^lpY5Awes4sjmN*F#11SpE9f z@0p7Z6Z!mC&o#~&y&wAtv#&3PDfy{{?q!2;UNUlaMNNHFPPOoF6bn)*ljb=(Bzfx`G7>^oJZr6^>?^BTSqP%Q&J-#{bFn{7X*oc* zkkB^3-$RcAVY?wa{|11%T_A=1Jna85S~igkCR=uzh2N5=iAOyz?M#C-ELA{y3#f~z z2i=aGBYC~v2+sg{iNv;oN7oF&A_Tf;tKu?pqp^aU!T z(DWwbI^IB_9r~gH79dxhb!3+fLt0G%i9G8)=K$jIty$^H8{O30km!AI@Vn|)Lk`-l z4|Q=DbOFjoLw>b&7@tS4gXB3LJZYp%GQh2Z4sR<&!TjVW9(#k$-w}LnFgEW$IxpjY zb>7SJelWmLYe)EHi@EDGqYbnNob-{P_Fo9#4b$?ywI1DQps35wP=~}rdE(~7vPQs~ z;o$oHb(U_qPWkdMbThi35a73bJ`@Vm@sv3%NYy%#II41J=eE41de(`hhk;4Cqp6an z=4Ux{D-=ykexS5ZCJs8fO-i&Cfz9k)543SA^^<6?^t{r zT4_3!d$Z`JzabGORY*f*)WIu_r6>fj;>A*tMbO$%Pz=2^zZ=jvISnBiX%}-Y1#Ok% z7eE>oEk&+bUqueaw(()fzH{OqBx&p%pYW@HEoSS8nT#S4itL-+Eg~&LxWpePM=N0K-e$vT19}y*t_G#YVOsxL$hhMrWa>c@>e>!VYI4|C*ppW z*%3r`dOychRq1qt!qkRuZZNlxvz8Ak`5(<2DHpQj)-@vr>uBN`t+y15TRH#_(90B~ zD+?mPk@#k~u4rL)<$?S0pbd*-?t05o@=r}y>424O6^hl>f!0| zAI8RYPu4gm9%c+@>VAq@mgeXTLPxL3qGJ0G&x<{+_pF?qa>#HPqt#foxOwH#2kR%^ z4~G*GhqIlW5CQEB7>WDWA|bG%7XsFLuE-H_6!f;6;cwm#dLGLa~2{k&T_?xWkwCT>T^1q3j^tB`%yA(9C zOjrZyERRu`_xl2?e2w_HAJej2VAz=R?WW>*doGyvqI7Fx1`S~!Y@lRxgube}qT>`+ zCG5fVB0TbHw{{;o4D1Z!(jT5IU_wkbWuE{(ct+8Z5toW}q2l(&RKy}FUR)95en&$W z)?v%X;LBIKuqrXZw9QfyQ?<4LWOfnhQZ_XcJscY{Y4(J~jIzR8l?f$iTaQXtL)FT> zQbOm92-tOV8+2y3o{&E>ebTZra=Zo=k5awgAwVOj`K&~Dt_;)A@5odrtmQA3EzJ<4z?C8SC)VJAI9{)IupPcAG5ofB`G}og?@|&xCFv3)o=3 z$x++cB7?V?QyjebbyMjSf)r&yf6p-?-=Y`%CD8;Qk-)t{IRN&MC~`Nl>^hBTGjby% zL8;TGDpjRuSGBO`Sb715Vlbk-um0omeoo| zu{wS7>Ug0NHr)zHwW^C2L69xXSw zQ160jqTQL21IxsZ^pOh4#bw~i|Lo|sZZ-1UB%W9Dv1C1PG}Xr0Zkw4By4`e3!Jiqp z_W%894lQE{2m~8+_snI6pOHHfx*UWD*}+V)tUs4L^Q!EG>@A%{0>$(Z?blJyN&@ho zfM@=6+eh~Tno2v)p(i=^xymSIN+T@;{qe!IP-txhh1=ks#&4-%&&=Lt;v!-I-1n6~ zYeP3sX4=<=dcr6Te({m*CQ)_i?EK0GG12F6OrQG&b{&GyI?PGVhwN5@VU;UNfi-}N zmu)mTUO~kzqUVA3`;W^zLerk6EdsxiA@FTvDZ6iCUEo~?T5R}a?jD}=H6tFGOL$Ou zKYQ(jxx!BSMl9L{S1Rpp^6WfEiJCu+nU-j%NPOxFKR%AB;G07Ry{zfiYF7P~|o0s+x8(4g{HJRmBO3l`hNhtmtQ7(3i37i zRWOzY8CY_4RlQpWRK4%x?o}~;lq?TB(S9S@A;UdI%X@DqsisO}GO}GHOemfOQ<^Gg zugJ=%aKt2u#Qju2kU*g>a|HG%vOvK>A1`gG5b*AjVOb&#MsbZ6zW(Ji%%`)Hcn_~t z(ZgN&&$O5VGQ{(wGE>Di@>9l0Q`K9=QVha^u1 zQ{2UEqJY>&*brmnoeD!vnre0o=8Gqm4^7ZtjE0cBLhW zls6dM9DV;~yF!4V`jk>B5E{snqla-l6*6jl@|(-xW`!AKb2x@OP;BquJa3;tIjb2L zL}YWUw_@d^AU&aZv0mZMt(p}NTX3Q=Y&;_tnK`Jt4kYeV%N7ypVBYete~_HjukSxS zGL!-gj2FAg#+&9(L*g@oNfeIa6o`SU?cmrU>zd%C0b8()%yOZ$%JOcSRQ`5G5 z3D)U6t#>>)stZGuSwvz5f8?I-z$iZ^2Md@tMBR+LXBhJVzpk|jzoZ*Z@Q+G$bhEt;cY-NJjb2!GekjO^v0hF!KGJlzd( zT(1w7;7#&~4&l^){{Sb?A=UwTtlrg4gBFU^(X~)U;wd$I4n7}d-n)H5w`pH%XAbM` zFamw?j$sFprREhcCaS&m0a#wGF(llt5~Y^@%0zGTl}7TIm!qpr%s)HxRlX<2Qq|l{ zD{bN0P=XMImEu}?mEcU(a5#lHt)5#^+8EU}{+Y1fdB?IWiT4ZaQuJMSL>K@-pXu^! z9P*W1>QZ~=K^H~wIf0o;k46TW7?Vw$@!0fFnfrCnsro>*YBPM5fd17Exq2oEF90E% zAh?^OW+^MbVsW?eWHHeSAr_9Z2YGkNU;-((mX$V*`V^@z#D*knb+vGC!L_t|FzRnR zDU=*tLmIfy+@EB5X69dd{iFu5>f6oj+Rg3%r}FauFO~PCz97=iRjtF}TaHXL0jFN< z*J#F|#1{m0(drjR!YQY_UW(s#xd`0)aDQ>opYh|GIg3!ZSz$8Uyx<{8qoPSxC#wSIh_%tYnw6A!WCMN-IOmXfiG+Tm^rT1MtqXWSfTCi~dj zWb@#ip&wH$2PkM)@9JbxaPU-R^~m92oKt#M#Z7MyP7w z@vfj1b}JarfqV7jC5_MQPy3-uIlNp@!HA$v${MP8lv$CC{7@QZGsN z_{m8V=7wC%6|P&kK&A$lWU34A%`+>($f@@azxcS0*>f#OUC&1=;O1;bqz4WTjMR52 zIo??2Qbs(sREtjYmm{&>FPr{tB=*WjvXqk*SGsgs5wKLkEZYoijY0GSaQRcW1L#4& zGJ_yxv!&9!`Q8)G8vI8!ydq!|#1WYwiWDLG{XWk9c<(y9TDQm-Inj~0;q<)XZ`8ApfGKBo!||SsqYp;Nx%& zj3!T-&lQFYwrUpK?q8vb!Y3;A?U7uaJ&-`zT^AE$3*@s|_b-oxEbz-r^EQoe&eyx= zk$(EspFbVXh)_{sIjo#ZjB|T&tG|k$($2vM9WqwQi2R*jB59W+dJSi&q97|}?*Q$` z%Z;Fd>&kxNnj9P#T?rEPQUd%%6bUcjQ82tZrA0$bmS0g^~<)|yW0z>MXsAh)a z}zbM}&#bEg<~cv92ayCel|h>YImQ*rzd4W&mm9)#=6uo%+5 zs#oE~r|3|f?9EN7-DAf3ULM9K83(HT&>O)TdN!54eXb-77hi}KFx^c7#`r`6hjPR9 z`NO$iaYzAg^kByxDoh?oz--4mtO*C1Q~q~uv!RQ=f@i&YA5Dy?D#~L&?mU9nWmMr} zx5{U%lAmJ>i2 zcB8*gc_nw6rUdIpbQVBjS%3)EgtR1XCEluI9@Z_4T*bcJOwCFC8$maTf>ACAXNr%q z_)jXm#P;Tu3K8vbo(DjgsG^VM#A;7RLJh?3SEAM_q3#9_XI4CwSG}vnn%hXva8q6I zmGdKgYWZ-4e~;=Im}poV;TUChu>2PCU6faBiIUV@vlDjFI>j z)2=~YZtr_6Nrm_)yFbSVad`%UFHaC5ZklZiN<7b@=E}a?lHm`T`l$rJ+h40phtDC$ zII(=h;p~vojOxOVbvD%0CQZU?SCVqh?T$W5HLaZwVQFn|$$deNg=DFF!s2E^3me zP05Y2nE4>C_Kntkk4fJ(GG(kFYw#V9ox;Fn42^HHz!x5j5O`a zz`LR*#)^4?rPYV`WgTcuARf4;zlvWmp62e^T=6ZM{BZ#=Tb*1o^R2u4=kdoT{#e(5V|tCvF~a0O=>%gJI(3lFG9yIYF9hAePTP8@3`lN=2qSE1X2ILQZv zRqxHR`IHDg7-5lMT^6H6DIX2Ci_eItVS${KZz0F35H~-$#5i?K2n3s{@vvMmN^V5L zY&Qm^2Rjag)7&PgX6{W+1lcp@tlBAQbelRGZ_jXvZ%{xdYTEd|yIl3Z6vTFeqmP$- z2(lE&Iez}$-jAqqYn#Q;w_c#duo7?cypG~ESo^WKYLN|uidDJn;lRx8%w>tkrtZsAktohbQgzpE?@;OD7D?-Grvne108#=8m<&@PNn*QWK+`&Mw;-*D8wi#1@CXvD z94<^M9p~gU@xHAgxxh@teaMa}21O@=n53fjKVjZnh>TWqtMiS@^`0;j6lc;_%MIV+ zNV1bQc&H_ll$K`vlF~u=_+}Ah@uv+F*=u`+h$=vsqu@b4d@wBt zeI6GOL8+yY&tI@sjxvuVR{nk-ZoZ9zoS_l!U7Pd!wm*^vx0gA$r}=i~H|B_dw%w3R zPH!ZTN08PgDpo|ox$R60dwTM9qLmM2nI2=^52%eb_$&RzcM+2ds?Ww~+=<2pfO##1 zHh2dGzfkn-qvv_GftQH|Db;*8MnakuLn1$PbNmL4JejMV8`r+Z6RgbSrs=Z0=v>Sf zgWDJsipPo#Z$5`rZ=~0WsB$}&nC|>k_fqouPr23W+Rhf&9UzI^5N`pMe_fWi_t)C6 zVL9Z-Nxe3hHD_0b1>SP{=Lt=e65juAXAuyfySc4YPulZYWt*=_`)Jq$27sukCn>cS>C2u#K7V9s(PY2G{F2S4 zH1x$o?H!0}z#jhk&9=*C51X{k*5PE7@)Aa{7W}KN2<0(QID0c~>c*@ggc-U|$T0#C zKa1%f>OwIHRk6^Te+}#e#+!o8Y%X9@uo%@(c?0}*s2sy}6sXs4JK*3X#oTSa<;5Lh zZ#IJ3tLcL;d;@we7-)^yu}{j~u8HI4Z+{Cct(7wcsSlN?0)* zjtMqLb&fi2cpP@`=a7G-Te=0p+#p+v@M4HT5IX@Qw}QpMMCIyUO1C#RR(YPztaXp% z)S*Xh^R@c|t8G;_03b>w9W1n$axic<7}0sU-p2DetrOkv>& zGh53_dti!$tnYO5c#dI1A1cepY6^jjz8y^+(J~@GiMZC!HIZX2s#=LY&d7gDBW;dt z-qpDMqL_3Aj1M0w5d@mOi`c67&R$|KdU6q zFSpyVN$5@EO3jtRi@N7DJr?@@<>%*U`2UOf0A}gyp>oim87% zHXFZ5xuXJXq@Fu*B4W9~|CfTo&hyx$eL4#%VfK4p^x3q=WUeUBPqWo)yi6KkV)g5`?%ns+4bzCys;t5a_VPeoxj4Tu5_A&(d9`AERm+2R-5AbW6M5lPcpp1 zp<}3fqhBCqyGvpBM0pNl?sAWkA=_h z{dVt7ha(#174rW+XaR+-CrsOf&xJTkaaase$UUVAuid_MBhaOKP-dBr!Or}nTzhe! zUC+gxHtr0azz4e6kNa(1oYQsS6k0@BmFANsoBjzZ&zWx2nFyDLCk{Ufh4E-eG-$s) zkM>tj9TvrI+Ax#ZUh?#i<6EIi zL>Wxr96g3b)@p`bjy^FSk0)caFc>$R0DXH?ApCDJBAGHZ)c^4WcGpMp{5=H_ZXZ&_ zE!7_2zq7naDk~X>A{Qf2vXUzpv=EI9uf@U~!fH%#G4Sg#VA2`}B-Q+H6E(y=aL{NO zZc++`Bm|2BzA;JCs)3^OBj5gV`WtTx&SMpKwX?$@Iq&w#>V$JNiaU(P-Lu%U>u|@; zv7~ee+|et(nv0nQ)08^=2Ut)jJ&Pfb|>`Dh>m{b2(+4}rs z=n=#$3wgnP_hX*xUqBp)5Y^ZlP@z@s<8%Lh8hSS13js5i3WKM~Vm7}0vtB7~yQE^T z%JwU_{c=V(^9sw)tlE?zy;~WgES}V&>hGBm@#F^u((7yFcJ{a+ibo;+F=p02F(Bbp zc_~4xbGbs}U}9nuVjNa#hTn``ZYcSy7Ws8AF+A8dZU~#AU}~zyJk`u%s`-JW~cI9XLmEK;mh&JOMKdFO!6FD*i_sN z%UC`IWcC{ms{1ZPt)h8-%m1_#i>c(+HFT^ILjif2OoVoqmQ_7lna{ftKDG8?j1|OH zJq7=bT}ugqpIdMPMRaEyV(KQ5&6rp>YH&udtsrChg|z-L_Ig~xdRejkQSZ8Q1U?IR z`HLlzy#OL63;~bnp#98OlV9A;aY)P46z7)PiOmkKhriTP^x9HfSu|9cK^A0GP__E6#M~e57GOuKs)895cpGv<%oa;q7 zh>P5x5lh_}vyj^}_lWCSUWOk7zxS84!MEryh zS&=z-IfQ~Vb^}br?)^>)sYybZZcyxvjz+P@APy9GY(lhl0t-=mh41S)b6oF*V)D2d zW-yFEO$!|p`j)1y;xoABDd^aYzd_B>^j;JHUmR}UVKBn(Syhh>R?42=Fu>FEc(5^U&KU|NQj^oyJu{^Z}yfhW;vazV_t#4!N zPQFkP$tQ+nY2?b^MM%7)`n#dPL=Li0)OIm0WPGKL5U=UNzKo`Xi;@ooz0#jV58cw3 z)lXRd{wNsOy+H#|kCVDh6|iMv1N5%bpHu8Cm6wvOc?ZG_oG zGna5aBO%WXEdlOm6yfoqN9%bF;hhW6q_RLN?@!7t^O%YxOoo_Ny{KIZ$8KCgS*2qG zPE)RW%DhdDpDAM0s&HrfSNbVxi!?G10e(3Yst%haf`n+=KdV>b&DHFm)$3oqHp073 zOFp}n{@RDsne0mSB6LjwVMH47oo~!tlHSqeZd~C~_1S~o7!mcaODJvAh$&=+g z2DK~W$+)^^$YPQY$?ydK#0Ta)&Bz93UBe?D!t0fL!&!c+S{9oYw{sH}|F;ZKC|wri z1{t5P0l2J_*oE1_bir8g#__j ztkaQGT+suNEe)=4o7_IkR_~k{fqIs9kjIM<82*xbE*aQ~@D&(Z?~i^B#qv|3TyEaE~ay~df zjXn6ZTw_b&Oo9BT$tR^*zokfug|)N~KujeF<2IeNZNi4g8wH|- zIKXz9P~ZUpfI3fK@^6&=I^Dq0i!OVpCf6*i`t){CiC6okYyp-&ZU$56>DTYP zHax3ehAEsXCSTl`bSmLg&KeQ8Z{q7-8>RIqRR~e(Djr5VDIU3GL)Y%no;f$i^ z4Zu6(EmW`4PKj7{y8=;PARv?9FOY7tIgKUb^uXVf0Yy`!Ea7#L{Og2#qtIK&RDSQM z5K`vk6y+bQjqhklu=Q>Iis0k!A9-f6%uJD z0&)xJ_S7U*z=~Jv9J*#c^K3|P40UG)9H-xsBQTE@Uk$?ZjRv7&;Dw*;+7lB*M|5Lf z=`T%Yoe2f7DpV7ax<~tsIMRzX(KfQhZI+h9V}H0kndiS)Q)PMl;P2 zy5YZdhKox~w&EXLetaMyH~_HwQ7)d;Er$iS#YW3+P01$Z!e?s*&u-5Z$H5swVB)}K z98V90N2UH;pks*v*xI|BK=z(aR{DaOWN-^jI?|}%hA;j_HEdD>aFkM4#E6%3Aq`H1b#%8o8s2Q=p~acP`I(Fc~N zI{-y#chO85x}jb2(3i9DhFSDuX9PE1M2By=pkPCbDIQz5-aNeg3uQt3w*3d#AY#GW z_n;$r@+@mfeih#MYIf=(WS%#x`(ME%pw#@RDC#kc@Jw|PUb5Yk%BJjgf2wyPHUk~azd5$aBP~-Sm&f=JAj^)d7BvLe%JT9S^W;966 z!|{sl%ejKSJuW=9bIRsQh2XK=kJ%RYI-Cab*-izYDqFXYV{cyrw23L8k?Fap0Zv;e zn-<{x_KgJCtrqP?L z;lio>`b^muR)?r+Cq-o6_yW)Kbldu&wzFst2y}{b3GE0WP9(wh8uQXAKgjBbL%Bu) z%wYq?s}L=wQI)@RW$r*6;IHCvSBp?Y;j_q8M6s7-mBhQRQTj@BH|LUImRxq!VP3w4 z#LMZ&Y3Z&zv#RPMJx;mom$}WGNMY8+s!r5OSQ&4!CKd6%4E=9SaE;e_aIuKo5tweH zvlBYprI-a<#N0(d((?It_z=xhM|=tYGUR;*{OFFhI|sF@lo8mc>^Z!D(Pg;7a!1zQ zh65JhZ3lXpQBpfe@Ne<7;|8a0GqN3l7*RDpQW!-`Q}G3|sByMTYQTOC?ai$4?~el| zmeh5%(XMI{(Lks2FK!bW+7N-0V}i5IdJs7@Ah%x;W&g*@WHRF?$`E3ZAE4tiH6Y6p!{aKC<-GDP{4EU8Vd4qC&`mNI%z z%eA&^Dbrx}OdV`TopFY9ve_?^gi0&goNaRoH<5J?-vl6#T2lZ;{RUh;Vx?q4iqQ47 zw%wqyAG_=UT^Fg#DOvhwG#tY052SCweSOA;4mvdyD+44Amygb`1@L#V0C&j1K}Ngq zuMAK=4M-t2@-_+p6J^!sIn!PP)IA0ui&K*}@M=LXa0eG!a}>W}B3FA7W&F@A%YBVr zNP8porA^-UQx|)8qB9$iZSO|lA8|kP#FT|s5Vi;G>mp~y?zg{a8zTuxkrug>oVBT+ zK~qzMO~3}R2uD*^+St5hYcKbH3?sn%W9<{ER7I&0(LCr|$t3O`8--o~QEd5BKXW%3 zJUfmREB>UbcE|=f2DYm36W}47?TSM!rhFDD@--6cn|(PlV#W`P3%rQ9G<|=T%InnT z3sEe^`8-5HGtZ&X-jTN_i{oo{SVb!T1~a`$BKCj?RSP%f%Ojk%U0M&A6#shje={yrgBB$n_ePza@=?3C`p`l54B&2X32)MA|jc z`ESeNaJ8+YtA%vRgF^`mp{!#pkS0A@+|LmqlOyPBolW#|_%RUKW;b0P;8%NMCwI&N zKfl}9#I_~#!z1LNOj)-j@(cwNZ+P);6EH-cW@N*vo$z%sx2 zK&ZMQ)y0kz&X#p$=zE-z-k%gav42!=EWVnsy^qeHe^l=Ve8-ywf}>j~&>z!j-1T{V z-?LMoD)IGe<`0(pAc?nBD^=?rl&@`wgj9$Nez z>4}sYQ!mZS;5Y8#H5+DK+X(IH_HYmTVHqTb=xAxWdrD~cuvjvpkh%=-MEGjl#A-z;3w~rnPLu!87?(wi+;QoGH17N$R z-rFg{?>i4-e8goY1G3TW*V~u zKk_={eF(BmE9Ss=uHrc3yTGjTMW~n99Bf;&g+|AoWpKKjj@4m5i}EHy{UHqhjZ3~f zXVtyfg$1*|eH?%HOK=<(9BX9X^o}_#P)@Htuu8-JIidwSrC3P`Z4Xo3-EmZy<4Y;4 z!@9wOV4RP*dE;PWe4$=4%fxWzjMf;Xu_@Yz=-H@1p(let#+IlI6`gaOf+TlNl82c5 z%g_(>K(hfRVmz`fNO3F1(KRcH=FQh6^U0;zN(UP3ZDOk2%3mBlk~2)!yXC})MD1%a zi*$FyH`NhZkRKR{Y*I)vnFRz{5xJ}VG+VMu;i2t*=;Sm_??O)T4b@sAgY3~sWMHm-+x#)bEwlhmIvFSvEHeop+ggX0xG z;QFs#Xg*Zkxc47Mp3(m@5PvH2<6-cE(c<`Pi2>@IKSJ`piUt$Xp6ZN{N`( zFb((2z6!RkH-lfWKY)>vYG(ONneSrORKTTfZMIr&x^y`Rf!!0dOqYx!tg33QeX+(g4SteJu z_hOW?NT2i~?C*FS5GP7VQqc}vJxYKRiSzCzHAa43K8>^3v^z||kTeqGnNq z4{!U#cvX7b+L#E95H$ep*bm{$3(poa?p+a;?a5&9Wc$iO5eXwz+3H#H@Wu^;S%JN! zrl+lhzeVk~SW^1%jwf!-zgmJ%&o}uQyraYTrKEO5qT^KZA7EGZXferASvb$bv>Etz zoj*R+K&+RLe9OPi!x`0^cIs!|YG7PZ0z`sHl4Y3beq$5BbTR&6iOUcDZnt6~iO$<5 zOmTqFV-q$r`n7lDZkuV3_)BXLG%FQY#d)#Oh&rM`?q&^#>B6|3Lgw`)E9O2TMFhGl zjFW;X>C9VRWVIin2GGO|H@^KX^w8H>e)Y20Gu=O8>DG{H6YbPl67k8+Ff64(H<}s8 zJEWsI+Ga_vpfF3mic4n<*clQa0GF}g$&O*?Hb<&4?zO7S(aE0o%EXAdLBC`}Zbg{# zKgIBmf=O;0@h8;lxzELsLp4>YrdFNWy{6n3qsJq_X0OnLP#zn3%o-GR6HVng5+%?E zYYB!q0lbSeaR`TO7T9C3{Q}YUuZv~*ssSYCYJmNt0nl3i2AZo|41y_8iO+2d0mLLs zi+I@E*uu?z6hAdCqZqx~`E5m$X+13186<`WPjK(X-@7$Xn%OWd{PL9Y#sK7}lrV$E zaj7JUB!uRHUdmK6IFOhcgF^vpY2v}ZMcr>DsUMqn`h}Hc4Pr%m=Y=+rEt^fRl$0^$ z_DhqpfIpFpgNuw=hGRO&x?L0ljOI_+MPNIe(cHf)eg6T1IH9^P2zHpxEec*+#T8%`WW`Q+CLT4C-$Ky`rkya`eMn`oCh0w zz)>FLMz>pTG^@*E*PRFtw-nc1xV)I-w2HFKle6DYp2W1S(FBn4$N{P$7G!0&cTu#d zarP`tiaZCCZfc^igh-S~a7luYEAu{VO$4D!g|XB~+`zVuQ_&LuS`~q@{F+PEs7(c^ z$-dDNMEllJeTuK=auUVPz9m{6S~q_dpa9Nq8SC`e;0(J0tt)HzQS_NIpS|kA>2~b zo8VIU{)PFOoU>@PnvPo*fd=&COxP;xAE z9%e~5bCrBNP%mhIN(z2gXLiYO8$Z|&qQ68 zvUIozYNopQ%vGJzuPGZr;5XWpUs9;yUV7J&axQtookYF&YTU-Swi8u3$qb0B-$uKs1K!H!20b05!G+JuEP#bW%HgzVH zEVAU>Z1mS%XR>S0L9nU$!tzyN&oT?N`*Bru1H@Gc>*^_WEelW=*A>Qp-8(3fnY$S+ zA8p=(!tK6vD?z)!M=oj!7(XBZ_i98?#k($cT$FU-usjNJu_)~X4Q}QC9ZxhHcq$`p zbr3X#akcyFi&Vx&QvEC?cV&#t-ysb#y_K%cho&JhYy* zm$lW?D|HX6ty5X~sxB^&jOt1zc|$KYFTP{x;44#0IEHt?@i@yP@+o*K&DqCAoTOL! zPr%U}>8Hcbf77Nz)~PuSwTI`$q0!9;Yb`w`SLHZ|<%h3_%r=7nGwI1IMQdl*XC&$n zsVzFDoDFGVD6LbN=hA$!b|;x)PQ15x0C!Zivm)5vBpbvb{TuPJNS&f7%u%(zLv0T~ z=lSOcvmX>p$Of&Kb{n0aA4vNl=EZ;fQ$uwXmcy_{(-F~pVYBfGBn$B$V!HaNb<7(o z^_E*?>%kcRLI>x+Hn&ktaAlI_HD%JB%&>7iRJ!;o)HUQ1p@M5nlr*5_4ooP0yO+%~ zxCLu33piVSGPFwY#-?5(AWzRF$6x!y;OFkX+QA9QO}C(x^&8@(d2)%`f??iQ!NtmR zmG>_|Y7T{8sn#VLXaOdGu#>>u=Zp6O=-0)o&DoWRb*73Z@79;8*ll<|QCTogI3vCeW9G5R8`hI73cmoOM8 z)~=LQE%*f*Kxp$BGU$VLIv*<|%+$18KNTVT~5g+5y?Kzm$|-%P4U<_|0I6E1-uf zn=(O)OBBi4yv!v6$!01P^e(XZXB@QlX8{@ZQ2<>ds#ML8<6V@e=T=oNugga;R<3Em%QuaxJAsFDx zgD7rsb1`t5XJQ<>M3Sk5uX6(mOu?JUci7?u(+umyo|w!jhMO4pC2ri?iPW8hiV@sS_rY`PQ35r0!jwFJqJ4V#2EK&Wvgev==kD08B;j{CKREUOO7fMf`hBBW`-0t$ zISFG)bL#=vpk+le=US|wz_b+!w~+sMfc<|R!E+R3ORTb^FKj1v^Lzuk1}iy~8@i7Z z$h`cJgrTz>e^5NybP*WUjRIusZ+)`hi#Xjl))zwkK!&B(W`q3U_D^wH7M%tf4b7cq zz3=v(a?alUzopd#18Wx(8RG%3N-rf-OyGu{aC>_`0!U}e0X3F9!r^nywadnAYJs9b z#hr$`j!OI8eVU4TJNt1p6D(g`W4wrufwh~|$MsD_!T{l%-vxz-OB`#(EZ3|sMxBG( zHf$h>+F~9M@o7G|%x^?3{&`OX?3APA-+g+|`}y(P{HZ-vL+2(3`KWT@ff!1^A#ta8 z90}XeK6kdN*8bVx%l~&E>+#xwlIaR60oFI5vc!`3c@t78s@N2(SZ36rXAaOULaqXf zu4lf1{k~QDaiRF4R{TNq2Bg4KXC6(l^gGJ5O7&vDzrEf8+M*|90=k?cUGZ zUgh@c|9Er@eY>rFwZ&Fl|Br*gG2}Ih3bOqy_D@gCZ@iI2Zyo!2cOJUlDZ+PpKUt>m6;x!+Eqm zH_#wq|4*#14n&U7X@p{u{mA98z{~R2n8R!VsxSMWuFvTcILu6YvLb7PhSlm~ocUTt za+=P;B6p+s_4krKhC5FuO5+t9(3JS1!UMcpx5KB>p)ex62AP2*iV^L9ZR^YaMxc+L zV^DV6FXBFPl-fMWh>{p^kfC)0_PSYPaW0dAXZ~m_KJArPlr{QQJv11ny2z{AhLk+n z=Yit>>%r$#`jiOcwk9^K`O4Qf<-9K7OZ!ysCuO()C@QqY7XxzXXieK&>Dp+NFXSNx zXFI{!?eyUOPy4jAhs)(74S^OnsS-q!%~2i;F6UwlLmUE~KNMLiyM`=Z+sX&?dSQm{aQw1B})MK3;pbOg~3!|HnL zJ7`2@GTJ)ZVS|Sl2$UH~k=|U3GCC9d3*a4c9H?+%oEdIHWWUh=KLcW^q&$et5o9#L z_*4xAhl%iA7&(-20(F6t>R~oMYQVTurU_e@oD0>utP(=RD3^+Aft=O6gqrpecWeD% zfeA~J_isFmL#M$W-d=_&43Yq4|vJ?eF?AUskZbA ze<6hELKd-%lU^i9qIXhskirQ@%W+s(mWTNpYI>DfrBv`od#NiI&=!_yJr^*``4^O> zTD+`!AFh~MWl9nqz`{bAoR$z_(WiMD!7EUk`Y~Ir0uZS|x&z zR=rewu5H#;IC}A5&iLez^hL>*;FVemZ)2JLbn~XD`GWc&BqIB8S}u%rx5<7zpH-IO zb3z7yKjZz-(4SvmfY^&B`qqyV7Mn3m@w+%0Kp-%d%10H2znUsf7^87*r7yhA0(AM| zmoS#df8LP~r$99tTkx?XOba;i@RPiuRt}Q`1N-2_|3&2aJrRQD@=t;5nt}|-NjMR& zM07>q>}^PUvP6aqG1Qfxb8VZekq)p7Y&UBAQC0_<8;L!06Wt@Y_n*e0va}*w?JkoH zqRf=1i}(qUc=tIpJMY{qsLN;A(gCFmpGCHSSo%J3mM=na}i(5`kInc3Y$5K=7!50EL;WLH|v#@nb0YSNDHc(d)jl1^U z8y*zpm?zdM4^2IbMVGiyu_iYj+_|`y0~=MEW(H$|!B>CkNkIdoSSlrb5Wp#E3Vz#_ zxzKw3oI;jNK^%C=#!PtVC0nJ<8}?0Kp`G}a?KKjvti<-Ct}-^Iv#w;`8%|jkfPnFS zTZivqD<_HznF>I+bFW4MnVX|-JgV=KipDi;*4NZAuaMwe(w{NIKj)Oir$rjpTdksS ziHKaPxP0C#(vNbhmiz-A;qJ7gU6zx)^GOP+ceAa*iWp(fK3=19c}CDS(kRka2b%Y~ z40Gk9yMi48h=4F2yM^_(#+fU#?gi}}x>r{~U+Cl}6BG=#9*eOf<6TJeY(WMn;`ZYN zxFgH(bDO$FNg`tO-W_~-v}Hs_7V`B6#6|@z2ttSlnYstNRp=ZVZHoUUy3>9}nvqa8JPvmyrj=t~ zF5h!+4YHNsQmjp?(vJHBKyt zjT=@>3N5PYGORZ4pJ94LL`MomzN z`8;L1PJr8L2M@?kY2m%nnjQqy!QhdU<-UyV`rZFD5X4WyP6Q2CbK5~ZSb=s=;VT8% zZC^QJ8&mqein6QfI~*Sfzyzm0_C|Mx=|>-ygEWoDo19_g-yxtc&nNP3aU9&|vH?2}mf%O+ov-hBRalN>T0NJQE(Z!<(ZV|4`YkkV# zZf20FSx3K#@mV0kvon8zH7eftma5dq8T4@P1rupZ%}_y_`A##Z2>(dbTubzHvPIwu zO4&4Fvv}Mvx00~crT~_{$83jcpWI2IXt^jwo1Q?vl;RyhGHP%WsGaenGbuByDG5>eS~ z_+<6J<5 zu4~H!7K;mS&O|BW_@0X{?Jw6+ZPu#&N4mesBF#p?-pFs6goaYTLj@N`=?9aD%MQFqdtiq$(d zUGS|trM-L=_wkDin+FQ693td`4j7~PI84?3$2U-+f|~D$+OuWT2+{AlR4gUa^l?<| z7T)qH!D1<@rxvCJ&-EwhFC?Dk1yHrtajaeREEw#g0G}A-#c1zX|9F3I%OcPigCHm^)wqgO`dLtJ^RUFANT+!IL9MVX z9@G9Rjb|E6Uo}wxgc|=Czk-o>^~S66jjU&W$uhel2_XqMM-H9{4R3?bHUh#dcFHn) zMK)smm8nd=%(MY|{;+1zpno)3V|KumI3v{KY^~=FIq#AD!I?IWM3QX#V977D zR`)$m&DMU+gjdkXp*2~X#p{KA_dM$<-aYEegp8U+0`vSi*>Xe2c-9+zk*19FzpF8S zoTqa_Z{rj)y1F+%F=qCpJd0mnBjKgQsCEb^P5)L^+`f>PLOb;5MkL*wcmc$LMDT_~ zJpxJqieb|c@;6@-H~RE2#CUVJ5Y0UpQZHSI8Y)v3^bci8x%vT{B#Zq?6#V+cA0hrh zG@!fE_c{W<1E;6{cQoSc_wqb7j^)GB^mDVkTIw}Cx&Xm%a+Hl%q1yfd48ChV4NPmK z8uiPoiDSl|5}E$#IR>Z_(@5OAP#$HNh78VGx2iYLxA#l^~cJPNgcKQy*~F;OeM9$ zTaE%>?{YE)9!Y>cCTP{R)E@?=0+JF%;_4ERzLs-iL8BL6+E)YOE&C`CN}_lM&|Ue8 zYa3<>^b}IbXT$=DP9!FBmIIoNph5Bxm5VV1QG#r2CF|RdxVyxBcs?Srt*ijc9uk_K zy|5YM7F}eqJy6j^gQaq?4N&yBNXVdE5c>AQ7`tH|($HTB>UTa40I++($A`=TPA>Fwj zR$Id!_JsgMuNETtP=2QxeU=*LlIMah5MHMD+?>+zuPJqt8QaHJQ^UAt;(7#wZSLzr|%KE+h6Bi z@&_3w*HL?f{I@=YM1!^J>*D`Rd_mopE!kT|fW(ZZa>c@y6hpRk);(qMGGo!*!)Jvk zxehhunH=_BcYpH#Pvjl$?39|*29D)7tAvt9gR+P$Mp4q~`iB=mvFmu?_FbbEY(>tSFsM*~F3v0LAZU+OX z;2M{H&Q`fJ&4~_w7oJD*%UsDdDbH#Pqpd`lpig<9RG9^D-*oV7)*@mX5}diW#XwN$kxqEC5;}aro_vkQF=T}Lhd%uEpd(wXIVT<$69oP8+`lvOPQm+n78?=*z`f!?xT9gTY;J= zJrE%^YNTW;^=52U*?+@lu$ok3GkjnJrR=Z?oY`q6Y^zcG#jtsOpu?*X)<^EWyU0A5 z)i^tVV09hfHcTgkyT&qASP~N)HeXL1x#fnViMXTXTvWWTCvdFF|Vo=rd-1lm8 z4W-xxFu3y$7=MOyX-whEqIiX_G5+}|@K2cw%yVVAX@MNc6#>TeaXGGpReFUEDmAr9 zxt;FcGwe*2sg2q>wx%F>#u~`_(UZzIsne#|iv$~FgFax0BoA%}b6wlh?iWbu1yTE! zJ|HkzjyItH9+(xbkOt)uEKnLL)6i=e@+ynHAgI}uF^@3 z?>XhvKb;Ij30u;EsH**xlXfm~{tx%gMBe z)TT5(cJuXb<(ID2&5n2!Ov|Q96eiVi3Y=t@6oo>H{Y5-j*YoSpY~v~?7*k3JWG)*{ zt*TYtsNO|5esj>eK9BH=;V=ht3gdsXNr*=jZcPNVb{hN@UAO z?uT#K6EHj>a9Mv85j#jvv}Q)tTW#jKgjOh0I3!^|%|)T9PL+h+z%4Hs?Mc@jFhw65 zH$u9xbSd%G_UcAKTETl=v^BA5TNL8~5NR^$kqXs=9+~v-x9dA7%r686y3W{%brIBT z{>TqUdX{DeP2zcP&|G~|FQJXyU<`~DA$i*Z-Dfxw+s;~NH51c6H}MkHv=7VYTjeJ0 zBZ601sD$YJfu^{-{&{0*BK+(L@X(l70}?} zG!+CGH_A;0yYaiQ;g96B=~F4yI*p$@u7z*9>*{JSwUCloh_)VEAyBJ0NqsW3OP-8f zAb9>ymj}26K8LM#WV~#v!-cuX&W+{gxR9BxNSS$jj`g|ZX4jbbWQ{mIb4`LA12aB+ z?}HkP|5qKaBTe?~_dPc|WwUQ7a9o_8>Z`vSJhp4<$3OPBSif%6U}@ONLv53Dq9jr($53%ZCHC<16gq0ii~6NO^_?8SlB0C}9a|2MK8?MWn}( zwPO2KB*K8g`0B0_SuN#hk`?lndAJxQmM z8VQlSA$!+HW%mFy?;Gw~uRNun-hc#2y|Rx8R?=1p?-5rmW&8a{j8OzLqCkl24a@%h1$NH7l> zecWu)Gnc+)+ol||CrW?8Ugi~7XE54{sX876>D`l#GJGvS0()`RhkzMEr+k~?{$nNV zwkOmSe2dmuS{q!KCeNv_BKFkTvu}u{eTVczNK6^ps#q86f@zp~N`MVd6$f!4suMPv zLmz9~iM4Q*Z7mz7CO1MG{Z`Jsl|HV{qm4CPY98ls4HwxokNrlI|7QA=kV^QG3x>co z_(G%FW)*>D(glk7>1>*gxC0j$u-^~}4WSBQ?;ht~I01(eF`$=A9Z_NbLKEwYoQ9^_ z#3o60m7xWFnl`7h_(hDGO4)5fGACJZ0EAN26T436nG|J&Xsx)EPuLam&!OVYP#fQlVow>u*=>AwboRv0&3m&TR~z z#$o{I{r@Ib+wHg}?Q)tlN=zJRL`j$5hwn^WT6*!8icPCwOMMlKd-$>0`<-Bm#VZp5 zcosdv79WZI&q5I!JvB99e4mnvEDUuHc9bnODxGsN~Jjp9D`X9Qqj&?Pn3Q@nRtPC>2 z7~-gJ>%9MJ)1U0dtuOfljl-4}JF~*cDAGLpCrxucCQj!x(z8LH_%q!sCiJ;Nw2!kE zV7#o8v9FaY0^7cWYqRgH7g>f3c3qft()M(k6Fuy3(;LJWn|Wcbf0P65YSOR5O$nR{ zO#X;HdiSiHAPng80!3ha<5u8I1;wAP zn@XzUr_?9>8>gJHGOX#A^L07ysioiOY#F1VKmbf6w)jJsr%R@P^mL1eqTA3zuwF7* z^r?;0w(Uv~*Ct}9JYL=mGup+!qz1HvRGAn18?=}RwYaVBsbxiT&-()gd|~GGN-RQv zeIP1Gm?o9k*`@+CaB);CJZO38-d{XUe;bhc6^KA852#||?In==(i7NCS0|(S|Aljo z`?%l-CqRa4W)J-CoirhBK5-uc8+Z2rcQ@~G1c^{D!ugn0oW{E78A+n>Q-}^B0?rW6 z_0CQ*&!P+uikj zQcE-46SBSNsHwEwDgI@GnOD2n7rn}YC zhuH=&lnobq3ITfc^CT4nQE!%Nkhf_$@@kXqs0wA#JcS`Hd@~o7uwG2_C=^rNOFt8z zJ3={poHp0?L$45!$CH=Hc33cy1&fjt6WSroE(eNz{g*-ZrVi`kfT!O}P|3)zJZ_g& zyIoXRYcVIIUic7P^lOgZuk9Tn`knm>OVccPO2*r*)z#SIi&3H`_5i$$l6g6CaK-ij zl~nJDtW#zI6IB0lA!UyktJ7pG4hq)|>om!!1{YeY3VQ9Ythh>9adeI=bokkQ{Bu4S zC|qu-3R&T?tm*`+Nq3k*B^2*M(L;9_$)z3fN$2#^WhlRMkkUvtlIlm3U0xio&9!e0 zz;xR6`Jjs!TX8Cq&yOMSXkS4Y%XmzQWNt^XMu>q|mPS#>0c*CdK~mc{ zIEm1~g04JnHL4Whw)vef4dOj*)A|1-e{gGt4}fo0UPJ6PWIWq%UpUUKJIS)lVT&Qe)lMMXf5qE_W@Ek0&w>pgr2i;&bH6sP9aD7D^a~6_=@VRrWpb-$@O&ZI-Ych0OIBs`c<>yKQsOV9 zoj7-}(0dUoFJ4uzKEr?-qp^SWp+fi5)}~V8Ll8_^DwpuA(XSe0t352jK$x!OXKzzMvUP28X5qvlmbPF|K6R(v}M zqGP=qT!Q&0fXbz{zAzm89 zBy^|C^TEeG#wZZKigi~lCxBnE2;lv>=|xhWc3Nr~$qQ2WL1_c;>o#Tz6kMC4r{p^Z%%a>?K1kcJ3)tn7{ z_p9nhgDlT3LXVjRXt@i>PU!Z@49#s5eyfIvB@xIEdszfN3X|gcB0Gv6U$Hk+$8gs{ zklyE`$|z4R{xzmrk*Ho^#Aji997WgmX;o%eMXDhU>s7$&C9L039)9AnFqX!A-N&nV z+SY?W@W?#th$6N7;g1d8|3B)G!<=STQjl~Wts0>F+jqh9f2h#okI&AaL^`yEKebi# z;(Xo9uHuhNDxLA@eSTcnXFJPGaCSlk7$Wq ztwph_CR=CLn1L25uBST`%W5<*TZBkOU%5j37FhO!`FS##e;6-2eC&K5ZSNWqtL+e) zeN&FaAbBK7q;wfh^h(ImZrD)6DXWtu2Z#S_cUkvP>dCineF>g+lNxi~=e#5$8IROP zb>FpD`#7O$tF087i;RH!1!KJi+ZSP&qENZlg97K$!yUuPLMXS=j+PMZ3j}uADMYk6 zwe>aD`ZheA=${@cJuZ~7>#ADx^1B^-!tRKRDKZe7pU8@d5SY9^I_<1!x+V-u5(&WC znBu>GH8E{KAk%v%gDjL-IT`ksUcsOt{!&dezU2%-3I_5+7}^QVY6$p^6JJx`-V#yC zyef%2fLz_(_Wdk?4Q`+y%WOPSeRk~?$!=-y^P5+uw4NE6BrG-`?;<9*OO}u%<8A_bYS*B1={ki>(Ab$2LRNPq?1v z?hlx&P3=F1#gYQ)S)iw7`Ob>835n%+PJfA|-z0__9d0mym!F#*-;{RO#3P$crd^JE zVAD676WWUJ^aH7KEy-IlH5_JavzUD52?e8lV=LY3%23rdZ`moI;^7T~8%RaU?1hA3 zT`cQwNTcs{2m1a*uIk0#6A)7}JJLQ)&|5H0%`+#+tY{-b54$LJ zxQ+-J9ayMjS$625sQQSC9UqL+0gIF52t5T`8ILz+kj5OqtY(HPNM;2)n6S&jWN-s+ z4g`QI?k_3$IXlq0!Ei{0z<*s43mYbr{cdDg3bn<+M@StAqnzd-I(T#KmNmRUuy-bU zA&c~p{^XvJ#kw}y6B=TwNDu;FN)1j6p76vRBG}E~y8Z`&*pJ~f{v~&1R&YqB`GMJy zImDhEyFcw}zh{E?K^lEEyA}1sv1QyAM=$7NMLc&xkNd?G)P^7N_@8|;`nlMEME=mR zL3l7B;fGcWwUVtCO!6YQTF(gqL77nK*xw`tkc=$HSRruU(-MdHN*NTjWz7n$eWfQ@ z=6K==Plt1WA={)Ca)@g{Mwo2$s^V>&Uf-diwE(=td9@7l!2*E+Bgt9apl<>Y@prR8 z3+~8{DUnmK3bKT5h*)QYKw0&LQxnwSGvl| zuL{Nn!den5wgzYo4sEitV&jJZd&-4|b$|2Wr1v_xKln=Ah z+QPYzYiwu-fTlw+>ttRB?0A8rXs%^%-WPI?7zv9#9N~QZ2K`$1`nA&e^kyHcL_b!= z-#=FGpG9E(S~dE%L-lKi>ZXA8y5j3`u7vV)v%s9lPSrUQ;*be0Exab-{u}*h?7?oW zxfdo1x}2Zx#gDj45px4dKvZQsM9dhsHFo@l04%M2k+RD+7`Lt+nt!M8_AnTdAC`*m)sG%46e4!&{pt$> zi89WsKNFoo?IOD3?1b^Z7h;v<)~?wQe7!+X62M_euV@u5Uk0I0%2h}1Rjl5RYeg?d zRIWJgai2`zGM*^%43R=EZEhw1f6M3_^Zg`~hZ1?HGK>b2lq#1de9eFFGQ4vlKQ>H2%ZMsqT?qv(2U5HP<%oY{tJfED`R>B(O;!}*zg5-~JC{2w@nYFF zcN^5kFH!+Y;eZDqfc$U+{8@p5ODS7AV%?!&JD-bz7(~CNRdSa;=0I0FUcDM>k_+AI zSNKJ0vake)?atulK|Gbjv3oLX)r|$N#Pkt@gMw97@ic!6GdT<=U<)!It1pgD#Mk|j zt8>c5d9`}l2?ZS@%!WUgD{W!a*3Y@BWd_H{^4E^vQJJ_2p_AFZY+=$TIyyCmwSBzr zMv@nQ46yu)4C!z^kUIvr1D-a^J_h6AllHtys6>ErXAbFnvFE%x1HF2dy?pXE$)qxhk7dCCYh_l?a=`?+ znq0rlS9Z7k0N`7(+M%-rt9W#LN*EYzgocWxo7O0@Td3i=T8cEw^b~e#87|by{xHqXTbo&!r6xP!pL*1%AB7g7D3j?=E-H%1=3AddYfl&67!MazOmWfCK zOnIvO^E%Bs%}Og~;OTaW{1u5o1FN4VvT>5fmN{ZUU-5_PH~}1hP{)Cp8-v!B&EWpU zJ10sfyXx8=rF}Fw3wINHt~@TK_Dd?J@(cG4=|E@K0?d^nZN?bxZVMcOMLD<7WNwr~ z3f>&NxqItI&97G$b#|1=mWwg1o(puKn5W!f0qiD}Lg?qY{vYHt=;Z%@mu=TdGN}^` zb^C$5S{`T&YhjzX750YCYNV_6mOgW0fV87h7ln*8B@SRe7PP`i4^$%|21 zK25JUDQkTnCNaJ8+qvKY^{<#01D=0#k&9f3Ty0`^TNde3OSscn(wI?{l?#2>m<5G= zz=V_Ds*G$IdU|aW@R;u8AhUB~A>1*(2udYz+*s2U1W-7fXm^AgA^xL0kokFKeiQ}3 z{~O@t8WZ&U$FiV-pGkB0@*K!FWCRG*I6VUV%eEBb5WFesrf=o(E^j6V`J0A0CcSd2 z&fdouXq;oP2>ME$*NG7sc+e2cCrGS^c?lg8uizu&=+{11-+;CBt3)6uI&fkjrQ)N1r`{bHgrlb$wSe{lM z(H`bwYQ&Y@z+u6YeR{&`8R{G)z)ml{T>aao1#qJ3JOM$Mi{}8b4`fN$~dxd)`N;!8+a+jboj0#yd&3( zBDsxLdSG~2r(o+U-G9SCvi>ImtJ6*VbeN1o%yS%_!RxjsvZJOO8bT`z3ADHUj(dFh z&bQxY3TdSxo^m*yXr)A)H2-#U7fSsqh-H>v93$|rJX^l6rE3#n%~1GZ|~;@)fzo|t2Bufj?&cfVpUFvDuZlB@I6U~D5q|11X#a35ypq;-$akZ<+tUq zPra)Jg=e?AO_KR6XP5QiLIz{o;l0I7wbv|CsCiWg{-N2B1RZ)uOe^eHLixA+Z|@E3 zC{M;&Sfu4Ejl)=B{Yz~DNzLWjMNL6UF%G2or};u-v>2luP;9SlTQ^67ZP`!mxgbqW724Xr!U zvza0!^&sm(u0JQ2K}ujmT^z+{ui05aiXsbAzHnpx;_|zzb{`t@4bg=B#*@>ml!}VR zX~hV1JN0bbiXZgCCnwb0q$Yr+FgW(elBR~aJAG$qC8$6fz_~b|r)?dTu^ghgpw!BX z&tc+y+uVTSsXVgsTNZU3=Q2&gYS^##kXh(ezgk46f_it9gIo%8w)lbtjs#A23MVRe z3KBYIF1~_QN)K37`=p(moV>eSMhLF)f5}@3A?jH9@+#l!_^)n~QAS>q(Aa8=aX$Z9 zpoC7QL6SJ(p|lCzGWJ({q(3I$39g^EH9V^(JR9OsO1>Ghu9k`M_tcqYirQ( z#txrB>=Rq+{tpX7m5R3vg-h{;nJOjnl!v(z+VHH-W<#xQNwR-g>Zpm?D5!24InBma zn_+`hOvC%$fW@;wNe>0j!S@MS!>!&BMN$eB=-r#LR*-$NiAN4-zpxJ^kRF(xu;l3H zk_obgJyF;89T!q{V~BL7^fOhi2zor}7zkV1{`P0<+tro-wG8{sMmI7Bi2##WD z|8$lk-`#hUJ^Gy#;!7H#V;(GACoU!Rp4>fAg1ZAW8JtziJN<>DTwL-mW%;ve2RxBj zrE;@37H%azXak#O=#K8=bXIRzm`H!}vZ?J%DCuI2FIm z+*Z(#@JNaFb*}LTqH)Y1%+h`ITmdd>N#0NHL_1ugCJ0!$fGVB z#Orrd%1GQ#zVAjR+5!+LCnKI?`~kQ>6C8>kAorHyXZ4z1!`fT( zxOeK$5Xz#VCT_f*9~%a+y%+dF&jC|4M8!cgJSsQTw4oODQ&lH+f_wIRrY6&yT?$#* zOaC6OXxREG7Z%x-g|Y?JMrnDMf<0-aDXaiPSwy#IU!y7~N_TVfFi{1_U%_=z@8~l@ zSTV+8m_C}w)uo`=KMI;5^Y7&82AovpV(cz(G-&z_EnQhhSRYI%u^9J{=kV80;n8RC z=zI8DS^PF%_;i{4H{bYLnEo3N{5l-|8{_;f5ceK*et`eJUQb(&D7~rNTvd=hh2JnM}bFqgsI? z7O^$>-QP16m`0AlerAfdt#$4iAcMu!Er`T2F~SXMl2}Ox*kMihOqgOnR%#~;c0e1y z;*gTrP{YV*kSWjT3J_;(oUH}JHwb%5rS<6Tcc0`wX23$|aZ_8ELY$+H!a{U8;-F?k zSr{T@F__A-OOK#Ag(%TLe%Jkt|Nn4V5^q zMpQ-)a5O|;R`)m2&n$@ZBo_4-uDq_kG{OYO3f7**mOgQpa#H`(18wsa|L9~ny zYz3i34N&U$`{bl?97T6}0DVA$zxQGEueDpMy?RjsQ3RdYzfjLoYFtjTkwKpzYG@-# zw>@(!kSg^CU>lhTFe(cuJKYu-&U$kqxkO$D`D6jK@-8f(8IlNVz zt1Z>d&7#43Yh{<=X>n?AN9ut=PhL^CDZlVp%kC{xUliS^A`IYhSu>R56(l~+PryR* zh~8NbBnFkn1t)@RFEn@RDbHx|&1Q9|&XcGh7MWP$GXFhg$e#~LdSq@}ylD6pLi~bZ zTGR}1uo1Hs{ArH?K-kVg3o6F4aXNVP>(BotD_BD)DCvLD+&w6+;igiC4^M#>3WY)uX)=!#N#Mvo=)Rad+f1$DzqV^(w}(RRF5GO=(Lz>FeL z!Fp^FqWx-@KM#u3Lo|p(%ySkH(hx^N`d>7jDF@Npo$s<3yqVv99Yf1Wg6_^u{3dJAGU2R6}^x9ag0_E;MyCqUGy`DjaNG`zRiU>bj`^7HR?$d*Gv1n4|v_v57hFayl_**o&ljIz4UXyd^n{hA7oP+Sv)wGiKk@3Mq6Heb~AmmxBvv z1G6oiDchCJ0K#ujyL1nKMk@I(oCepw+ zyXsNoP9!-Gcv8whMR?H&&LN*=?RLc@{+)V}^S-q<`>f=j4#8P%Hjbe(+)d!3^s1xf zgvN7zN#FZ#2J#g{X+#FWn&0VAv3ONFyhHn|anA2DV_$D4L~?qVl}bt@IdG!g1b(DW z(*%?e8TUt)I_K3jAtA!$r8&(}Ce&>2j$q9+w9a5eK!2!qt{(_eRma zwS+*lvjLBemG`tVqWXz(+FJIDeO)(m0&tEdKKkIuLwgSMKu=!i__t_!&bCx5mZXrPfK;pVg*_FV+{p-0f zuTiU4!!|z$;hIBXVdY6EK{tGo-5h2@2fGIXpUdgb&izZyJ6=eL_~hEbws_EKZ9jSe z;tY{U^=~IO_Nt4BViDcgsNr#ZU?tJZT;s<{Upu?KPKsb}-H~9AR>!}uz+m1HEa|rh z2BxI#LuQXO@@q4lU<>6!pAjeFnaL#kuGrP|**kOTZ+Kr~I&~}fc3EIaj8DOcb?!YmqoQ-7P<6ZP1(0ZSNQOpdZ*nXCj2by2iG=>_ zaORTlrjT8B2p)(L>x$&sbZFEvNuNN0!im4wJM7txG@k;R+dMJ&^jwHCe~YiS6|Y~b zT|W7B^U{PjfYtw6NBdq+B9v>~qU?E#JQeo=vk|1kfsqK)C%6yecnJ+PtO-OSMk`XiH)R5 z1RH+gT!@N%;@ZT8LIm<^^=3+;TtCU{rr^;bbW~%80c%D5zTw$UML+EZDs`)Oeo~P8 zuI_NvDym#Hk{MFs;gLEX#BdrnW`@8_S?l;xCaK9 z?90Z6k9p!MHTKa`f+o=BRYe7?p|?@NIhv&{PA%}I*{bn_5tfO7cWlV1dJP{zvb>g0 zAc~2$8*JKj@U;oCMiS*zAFV27FxxL!vfcbvWBIPT=o3AmGd8e*RiQ{Qvq{h$F!B3_BH4;76UvM#s*A}m_4PN!jfjAHXI*Z!mbQCuV1YbO^NFFX>(_J+ zU_O&y%19<@j0i20jN=$KplP+;;htQ&@CBG&QZ`vXJtMsZi$+N?mXI1Ms}!5uxcAp< z`8W(}e!cQH0{gvY3#hgqsV{ma7|klB1*1IwJmXUR8id@Eub`Syq|ZL}eZX-EE|50A z44Eo_H8;Yy^VN}RO*3-JKuS?!IpR_;V`@3~1eVR*RKFOc7!D7Y^~LW(&UxZAo$Vgw zwr`MQv9Ty#+5QC2>o+$KO5h4~ldX*ijXCYE2WVNMO~cwGkVo2}o%O0nJl^D;Fx&;G zKkRnak<7KoZZR$J`^;Kq5=NV%Pc@gE&ZXo5`JNJ#w+KK=Zu2X-Ls>b9<=+`UT z(Hrte;PF)(Fjnl$dTIFgFlQ>`CVz{|ITN7J=1%B|`tVclGeDZ+kOtme)#0D7H%W#s z7b#O1%$&)&(S36xxCykW<);{}%HR*f5&G*~bFWCTFUqc?4*=^l?r}0l^4$Md5teo$ znO`**N|Rl47Rq1Qh^M8xyye2c7g@c{03WJzPHtOpPyqt)`vF?N1)r^+eImw>QxY>e zs1{^D^@H)F83lPy9mMa(^Ld0TCpo&bIAfM@Reg56oKQwt<$^Tst`et}V<&mx$~?Ct zaAp!7Q5}<*e;>9q6Iy!z2`!`hzt~I-6pPgoXhNk^i~TWZzD;BzQ-lxbWOP6pLbJ}7 zhR~VirWaDk>v~}BrB6tN++EtjXO*WU9~X(v;*?}*ivz zfI8kL`R6BI5whZxZQ4YC%URZh&EXm5VBE0Uh^uWZ{S8>DmR<~|5&Stw_-_toFiD87 zr@~UqlzLjPm&$kHFNOW?a0hYAa!16KwHeVbGRqMApE{O8H4U%h?%%yVGzBM&+z8Mw&fRv}>|0QExzjjEdlZ7NaK2&&ylp|;7*1;lHAZo8ADbQi_V0Qkmr?*c-WDl>!EVM-pH!4g(Ya0&5_@1BxHov@aV2F z>Brasz3^#eH0~HESDULTh87AwBi7m{<(JX7=kn(aypDJ@@iec6O90gRT+9>u^*En* z#y{$U!JsT@LOsjLob|o)pH1diF(f)umI_bz8~+0M{;|dxWcM=)=*}xZ#xRM?z+Z+K ziP4P6TxmxND}lPHhy=%q;6O#FT7B}M8R)7}!e+~yPI%G3+_~SQ>=yqsi6pDySSldZx|(e;=sj(=!;_8{pw**D{gtm;j3D0kuOwtN2(InIBdeinH>r|EpT zlL6`z5r^nU{Z)KB{}?M0%O$21dpuQ^yuz_>K4!B^aB5d{N=z2SAaCantf*M%cF z)Dq7dD7+*02W1eSv*8MOMX+mDxlx!{>G-n2ngJdh@zRL+4Ugk-BZmD#-`dn|ug$H* z-vX}yoiROmw7OkL9~s33PQd}!^(|v!sx-a`dgEj<5FbB#dc3w^ z6jMOs;0DgenBsq_V80k?WeES8&dPmpOWD%r1$&ypHr^!q#koOm_2nBFwO($Q zU}43)QYvyl=*uD2_kG!z)$gHrRb}_L9RnXd3@>n4ysfWE0+vP(m{x{eYw3Dz zhBkmpHdHTII^z}lqk0E}Ob0-^kdNqgdr33Sstuyj0zzOW=`%*|96LCj2I2VJAzKIL z@*P*lD{utRVa@$2JQDK1*1zP9IE;oBAe77EYnRqOz71c|3HkMj^3%`APYec3(_ip{ zbzbYy-;>{L&D8Fc`yvHefOfr#ff)1>v8rZdH}uKvg8e$Ndtks`!%xghBv5|jK<6K} z6m+fQdvKCzTV^(!QD5mBq}-k^z#A}=e+1Z}h`*-T14Y%&iUGuf(_<@*_WQ~spmxxK z#3UVT@Ui8apoGr0!O*be>>jqP{x=FFaY?D88xGQ7INatw`alB zMQ$vV8T37Gs12`q8$3C*pXlFH7kx>ew*P+Gs`|A0`m}rY*uU-6`|3phH5c}^e%m7Y zs7vai8eg?{zSc|a&>!1X{@okC-2(m==Dw|F{iu5SlP{^)_SnC+fc>;fMf+8|?MlAe zhxXH-w?(hE@BS6;{kn1cPxtLY{j9&Xs{O8CwcPg6HfsA(P+I9FK|Ys1-l&4yl7ZnN zGFm;?4Qp4_sB(7AVU&k!&fzqs_Z0o2HWZNwTSvU|_iO8O6@jRiaQvso_ste9CIJE; zplVp899-Y*&eHo_x?gJcW~ilwW2aR;8zy&a?n2{{YB8%VaZ_ z6LML=%X}75yZS>Y+c`%L=sY!gR-Qz`6ar=)OKE~N9SQ%4KhOPblp92tn4ukpxw^4TvG9UrF3epC1^g-mWX7Z&U+BH%X~|Ttrhh6b70* zsHrOtAd4Lce^0l%g~vt(rpLpjM&t`X_=6%G5?1~t6s!F>2JN`2g3teFhWipJ$0?-Avm2j0-FwZgm9%6LF;#l9S&8{J4pusJTt z0|)mbZ`i`;z5T|T?2k~c;qX>j(bTyl>uz#JihVdVr`=J`M}ssaraeRjoB620;Xlhf zt`#jIMBrPKHBS+=s?OrcJSuo*w~gj&3I;)L!eB2fls`h^_6$;O=_4x%UYtG_+P@D~ zI+KF<-%S!Wbc(Y*_W-_~W+bmX(2m*U7MQ_i6tJ)W&rif%-MW($VjS6BL4= z^8h(}qumiFLA-&UF8OeV=e2cQ_Feiq$pYloLo7+&_#wSN(32WW&pS)(=yimt@~Yd` z8ZU$EgtQy|6?@=tRmq@`Wz&6<~uar^7` z(Xk=rax#QY<2IYOl*;G45fj_!#I-M#`A<_tHETP+1C1AqHeRal6@1}Cs<>>oSZq+< zsdV-|Y~SkuVJ81RYyeZo`<^=hI;SdY?&@Ila#6V_i4daakvo+EG~;7OEMm5u`eOGo7)j0fZPLSmQJw8b(hWt@}&3dfWAwwG;^B({V3MP@~O%= zm9!YS!R8`tEL$JjtWs;hA9({CBD~IbRTs~r#zRxrw32oOE*3-EnT5+LDGRRmnIFVc5LyLc1mEU%Kn;Wh*--N0H7-HBc)a*0V){m~gu?XI# zl^sQ*^(>MgKwk3Zv0C0$BK@d_LEpry-{NI`OU?U}) zOD{`!?9}@rB|6p5ww>!LWHjI{I}9 z%A`romQQSxGMT;kq#P@X4iSZkq|nO53v4!GD-lK1E24J*G|ZA2S5Ysk#*G;i;v8J6 z{`l2*32)Sp(CWf!8a;&GVL(AkwK70n5x6&)$N;qQU%=eyagC#1ImPhp6bgPuZb{)? zQK42@Yb+zU((LZnjnQ0zB^YBE+zo9vn$SKB%>#&DXTbBznR|}HSloT<{!kq+y~gWo zqByD5g^6U0=uB0RtZbg8>SmkwsQu}fi+^3&`GabtU^gs1mAiHSNNH`?Br*K1r9D2j zDmj;2^5K;t>*8f372rnOs{FaNv0^HPlQ;tIy`I)8sZ!tzwKLCg8I#nk8V7PGgO8?H zfO=v6>||F!8?&jgE#IM^n!)|_Ce1Gb``hAaG^zntq2OrpbFMC z;UEq)%Mx!V@or-!l|rRNE$UOkY1A%&WmF4VS6;<)NkI{-_xFC$7S+p20s!uW*AQb7 z?bgT+5B>|imD<=0{@z2HKU!;$!t3ZqPO}l9Xp~gTq<*L^=xW##4F6=U{g4-YZ4pAI z9H6uYvpW_Nys9;6#cisnQ|!&I4ymqVsaWq&vl+k;H-b4R3Tv9zd-q!UeDlXcCluJz zg$@GGxmq%y9ZB}O# zwO5|mgou`i1ln^yS|q0Oe@G>3HLk{ui^yPgG*2#vOZpDH>I_|ccArOhoZ$G8e87Rj z=25$f+~c=Pl`?9c);utUr(ISt4wl>Qia+s_duU?p+pGy!<7@;Xd-!ZhWk%kAxs$G4 zD5wc(>vFOCFv+|BQ0~esOWmZ?gBEBj0 zct{Z4T&SA>e}P8ebkmsRfX5Fxe0)o<^IN*gjbwcz2Oes00K8o5=nm+HoDAWU(KI~ zEIR!A=)j0DdFir7T!`UQ^)3qx?J-_snuW&2>Tz;E_QNW_rm`7BlAH%epy3Jmx_WAe z`wQCbZ4~>qf)KAwohUzIYlFc`<-QvuUiM zuCP|Gc@Z+7$lwPUm_ul58BABRH}p-4;i=J^m?~$S0oudNk&%?xzu;9ubVdIQa~8tR zG#*kkT^y2}O#2ESxV$tWnECX1UvB!v z(?D%ZOq1pGl@oGfTjR2@)b>ys_k2&TiW-Fuuz})W+%%6*m9ywlIlT#6`zw0NNvDwb z=rqaSEVTsT-=DH!wE_9RDai!r%NW5$H_QJ3+?Ssvh^=Lnu6CwIzK*C+0%szE1Ntc> zfNNlSRxFTXKX{@E4sqV8N8<$5p5f6KYeD8)0PV`p)?d2^Jjr1(C*Nb%)ZBm)>ONM5N&`X- z{r%&8E#m{qlN9=;iF{`bBDyPfmzo$*nZPrAgxzwz2^o_AFuX;m z9gb~88WO8)MV3?$oK?=d5FtrJ=Lwr#1XX+}*w1MAl5nb#uU2JE6FGgXLG(!cmz5Y& zx{&oaufP&hs>@c1M;zor9a>8q(2N{b_>kO%1F7AYI(~mZ(QmDw4U89drV3M(1aLGQ$jG8dXlahRBDAl!+XlgA6 z*eyI$mU2i5Xx+=5=}rQT?)hNQ1a9sC-K|iWVgqU}VC&-xB2P3IrLK)m!WQMJ2v|>D z@sQ#hb5GlX0n+VEQQVcKr}-8%r?C9C)cr42`<7uIz%3#hpYB~)T(ceg*IzZH61lG3 zs#Awp#R@)}02Y6)-OB;|Lt>?fQ%jDYo@$#2+^-Hx#8ni=L%p0x(B3ZPM>f|1me270 z@=z@0*r3E;)yr+BrAa{ z+h`W8vTudB-AQ?o_MNT2-hD`V2CotZ#Ochc$hpFJ^;aP02TFWG zE0hDbuSo4WlI{~)5hv0WuoZKix};@qywQ;n(e}3P)Ky;7zSsT!p7#KUNUR8Q<@bV{ zw}pi;#OUsJrL2n1x+>P~Vyc7|vVr31bpCsL$H9d}8Dn&s0XKiVT=w+W{&)sNv z^s|YAz}>d(K^P>8Vo z3fz>)grhA0C-U?8n2zN=dQMpDLr{yB>4}Mlce7UVUai0}?7?5hp=DB=C9L)B>%U0L zTSI7b@M^prHPR+S_{;wN{pWu)V5RaEl3q%I_%CMR-HZ0J6xe`Wk zk*Pxpj~!$WeVwY_Tmt|HO{<4x`~vKIkTpqcAOk-UHbxeV{y^<3Eah{Lh;8%*u#Y&eeL2TF0p&sM3LqlTU%R4 zoo4`01-1TNJ~YVsK663-24%r|%0!$kQFSI6L3wh%nG4S?VR$V_0zSrUKbRcA0IqH6g4{bqPQ{pMTC2cpqQ0fO>u z!UKqOp!Gy;w|K8{Y1r^|CN;eC6SK7nm-YJ9X9|c=@ZTJO5`8E4N%?Qcw4^;HBYFb{ z7AR>E4M~=0=6@%Wq_9O_v{QOJ4dtX%#t1J}e^yj#DRYG*p{ znA>C@y3*nc%=wEG6%(=g)dNt=xVO(IJ3NbnodRYWM|c^6e8gP4)i3u67ZEphF^pcJ zb$BLb`1mhP{TBc&?|8$p2cj4Bmi>{4Rvy^yJ@0<2k8dXlijC_G40nSI7B|)ZHL^GV zB>xH^={ch-LGQXI;vVRHvrEslTTy;Ge=^4HMxnf0hkKmH<;vA;#~yQn1Ap_6@ImrK zNfstna+>v4F3oDAO*$}L>GWznA77j|xbA&cUnqz$0R-6sY32&g@_-UfY^Zp0=k_9oZ0jlis&4F08Ow80jlg|vE?q+ zubRgiqkI6Y#GJs#5A;KlftaI3GL22o9g40nV}vU~xT;~=>?@!XkKt%BepuEG$jzc< zeaz7aBs6fcj~*2}=98t6=iz>$IW~|%3I>>i)hD>m5b31i^!>!r5>wGv#4Uw~mrgU1 z<->8;2ewM0^Rmw;Ie5ILo!$(>5wXk96@3@6JSB=bH$vx2pS(cJuvC~B zdA9b#%_eH$05%#M4vmAXzMmx85*Uye7ucRX455j;GyvQxDN@!#9m>RUYrejGO+WVA zRIO8KKE1um*tAT<0^#)eK4ly93vNs05i9Iw_SFF}-5+VoP@Udw+x?x_%}67BML5fR z%gB0Ui_U)(Sc=wFdQ+osX!tU!HiqHF2Cb6~&~@_*+DY+j9&+qIahuXYF1e(6lG*(x zc)J8LLk1XIB3)uNH*Qq}rKO!BD*LZ{Np@gZ5^%CM=^u$N%A5#>hKbV~Q~l^ueli7- zEC_;d=qP~fvzOoEGuOmn3-oD=z5Z<5bbx2_@d{fA4QjcYR;DU(EFX6`vduSrfkv;X zbZyNPDWjbQem7Br!^4iWHXoCG?+=HXTcBK*f&2+wWAXF9m$Jwn{Pei63mto~g8OjJ zck`_KP`!C8+3Q%xB;?#?PS1DTbFP}RCTADN>;G6&64#SaC@^~&Z zb>jp^^WD9hEM7}Clok0~pNSx+1aK)7hSeZ6R5(a)IxSBYvuCv;nr=XF-CJGPAl1b> zSwi7*fB${1N%Kb65{h?Hcq&MzA{k=<2LXa+#QDO817ydZWINzps%S`*B6ZtKGJ7#v zQg*&2XFhNqNI=Y81|4C(^!E=@%UF**7&j|O`zxmI+FNbur(tColBL7*kihMi64t(I zE$il?FAe&EjF(2O5S@K3BPE*|D}FY@uzU53X;WuDCWQ;1^RpB+*BV9UKwQiz%erI~ zLX9Z&0@9pg7E z%4jXO(|WiZIT8bh`97`*rO%?HTx|mXerGyT9{z-+G~5e~JHb<|9)<#h)nCvnal*(O zd4z8pQrFuS72`a!Rw5K0xmCQBU~md=ZSe0|P&xuRX0>evReSFpI z(qYeBFsR`5bHy^-D7%W{lt2G}mu&Q&9<;Uo#PX<31e>Mlk1vezzXs=u=YKt)P;z3F zv(Ly3^nkgP1PZq@TD%IeacciWE=EUrPZk{1QsBf1pqCpFc> z!g&mny5=D*+W&C^zb3LGpXOv$A%9#!f3IQ};q9vFcM7DFknf`#T~Mb!$!y=W8a4&w zcPt9^Poj#@xhvlfvX02`rAe1FR7tNItx=KH_a9h%E|4&OPiDF6?#_0s*-Q704_(p4 zI~=fkyS&cAB0%0t%YyXe%WR<64c|FVOuhK%lkkS7ZG2qVPHxzB4~0fOw&splKFIKr z0@gU=9(hN=f$rnIB(IM;23tToEKJ;JbCn%vYbwj+*5a7GiVDeYF^EZUf8;=1_B+*f zSk+WND>*E#IOBZYkTl@&e=syH%`j;{(`{_w5vp&Wf!Num-)w6KT$5wXTGsR#PW?Lm zI7#`%2l25nz|I76gSZdXGD!qh z7mOCet@4oy z$qfZ|8pRY~h_JdnwW*05i2aExPov<40TdOb%Q)PWn46Y2jA1Apq}3Mu#HCkG#jLG0 zQqXv1E{gD$)(KgA4GSlDGeSR82CQF|rsJs-;(ZraqHI>I;=si0yt{@BeJ2t~v6~(T zws}T_3?4I#M9knBd$cY>rn~!CdtV}hy(?<{%lEyC1ac(?tVg}HZxIY>7sRXCJaTAg z^VzDnU;lg5HehCraWc$bC=*B&A^%$_ipSdw-ohXr}P*iY**Bv74259TaO967Kk%#`{(rfAe8TZ9O@M-~x{o>ld|fx7>U4OWkw4Axqe3%d2HwHk`3bi*wqP+W;KZBItaESXE zM&wzh`1T9}3mPX2^#YP9-F&W0AZA}Zo+&d3qeh5QQP-ae)9M!&Aj4USn|Das$jl;{ zHh^-d?j6>H7_O|(nsZxr6S%uoDzBOhV{fBN7q2oe&zpLGMSF)e6e4fW(9(GgQAHAe zdDma1A6x0Z*8clfzlM%~3hMiHZu@li_*&oa*L&?x|3N$WIR6E`eYBPK(#=1?fBp>L z!$NV!Ux}v%4vIyhY`^JiI+?qEmH+ zk7<83&0toVV4jqj{Wbd+3j_8Nhs5AJSM+Vsn0Dy%Bym%~Hau@}yA&?#dOu4c?Zn<2 zScu17A;B+2fm*u3v7A<_ZuME96SuDZ+czO5=Jf0n8H* z8h|=GeRLjVadC>htd-|PX!lu1X`3J(;~9F=RvQyHvzkWeit;R<|9jI`v=c;8MWZ7X zgAs(gPOapQCIVoeXXfL#w0~%cd|JrNlycU0d>?bKw1IWn@X@XPfc`*G3QviYi zsT8{H2lE9lXHA(oyT#Iw3K3MPhc4@}%LNRl%vwAU9>ZZJ={Tvo`ggVVa;Mdbj3*%qqA+SaYZV$_n5aV52>DNE;%b~S8?~4jW^k5Rt zM1OWd5x|oG+Lz_l&+Ct2O@*z!=cQ*UM?vUZ+ELj%!${mGFje0xLT73OcBA3bz!mXP<;~l2hu};+J0WmnbUBc+E02ZLktfG^eou zEAUW_=Qj=L?RSA$lnLQGzfBT0bczU+t7)p=Z-gKG#jY$>V5y_93RK#o zX$4JT+(RY*8*cR=7UiUYq?`{Wr_Q*Vokp;6A!k9z;XAZdxjZq=w}~L}^;;n^l6P(g zs-fNh7U~kD)hO(;wn1Jrs)o~kIfp^QzB^Z7^jc|p7H;}?>gGN^P7QraVZ;%J8-Mf+ zh!d_KdCI}Quk-&ICRgtL1{+FTr?iB_zgIUR4VR8X!sbrbc$d6_P6eQ^i^L%N84Fcm z=_%XC_wd?Rj6chuFP@@nTcbZ*^Qz=?-1y$8Oy;&nw=2{-Kj5_87so_oGY8ne`l0>F zwUP`ss2fpT<2+^M|9sLyOK+5O9aZJu7I>VULOz;0vJil~=y45r%D0u)^E+KRQ5By; zZOH0{8|(fmjJ@pK&cJawUCFgiUa@R^AwBF7ua!ZP9%t0sXZB0jc<3rd)0tllAgexS z+pJP+z#n-78zR5cvz!2U_FCz@a$%_qIjRAMxb|AiT&T_=#}c&%^`$3TklRdj_mw z(h;F!miyFc!A}yV>#}_{zW}7B2G!5@EbB`VU{peWYLNF0^tWg;vm~WznCHB%winNR ziAd4Pk2;L{`sj{TaPt`$57eAW?O|Rj9NCrMK|)3B;G9HDX{T|L%u?Uz`0W4=0A}I7 zLI>gPpIy`VHe=&67?7?4`HPcm{=~ZlqYlt05|<3>k?d!UU($zm3xn&kzE>YmN8&ng zES~X^Uftf|^}=Emb>WeSISc<&Hr_rjY#W>aW{bdA|7H;>I(w4HolWM2YNir|631ss zqYE|l$59^NqOZcJV>&KQeuRP7Z=BFz#BJy!D7rt%W)%98?FCE(w(HJ~S@TCAII5s( zcaj{bIbc1VVN++x1Y;~M`a5=>Q)lx>n#@@`e!{^aXjm=*^P$0C84-jHnB1V!b9HT0 zW2%d#>>2_{1Mwi}sg~U2dZKL|PMWe8H=WuEal!>>hHokA&j2JLeK4V_9mRTOFLdzL zr<=FQrj-rqF9}Y12hKugOVCs;enjh2@5!k^7bVdt{(jy+7h# zIrb|)$Xxg8M(j+G*L+I8Db?kFfnrX>Oe|{HZ+yf9Zzg52+q{w=NIf`uw28^cBXG3% z@GiN4>EJoad6-U7^ucLrn3Bpa;-_V!>kI05tf(RVEHiGIGCVE9FAdC-EJskubdlpu z#4_^<%ZNL4&O{RqXwOR;*Vo_y!0q}s2bw69OTeQcX3E&X@n|EB?y6FFPuKZDKlj{Z zzwH-mDe(|jodgo)>BoxgieJUPpFi+al1t$#!$9YoQCg4(A}*1xb%*zN`?*+SX|-p0 z+s<|x-BjZgK#Uq8)PaRp#VO~tgttHOP8vLroSLhyjbH*PG72c+03Dg|nvytOsCf>6 zG(vxowChL9WCsn~Pue?IxoTk|>$SN(Ne>Wt!X;nId~Hu__`r=ncApYBY`vY3P7`1Z zyI6OeR4e(n?oJGAQ<}~N6xyWAN(o88w)%WAp9rX*hNrN#0;Tg5jGJ-T0XEP;%|{6V zeFUdbgzV}x`&(I3=u!tNBe$xN#4#55ATbs_L@ho@NTo2wXS(cGRwNd{@f+&iT%Ull^49G9Y)X zXThXSz>8&|-F}O~08MiBt28+Dxy)v1IQJ?Tn8%aE;%E?)9fJD=u`Tn-S950{Ulsc6 zE$v;I4GD_A+FL|f>?$dd0)SxK>t_V7*qE%X{#(gNuih3eWRfy2Wk0 z8i=p6laxiTEHpYr8QQdWp5$wm0Zyn=EfD)VL?OQ z#fRFG#auF~2ZLJ`Clj3tNsGqXpY(i)nU^VD?}^FbeiqYO0`FTVVy^g?_O-tyQ`Bn! zGlLp2en>J%5C24m23ielNA*qVW=OLWt+;~=-stApTwP%~^FjodQ!`E_J;6hYl-XAa z@DD87biQ<(b6V8KTqPkx|DCZmtjm|Y_#+E(wTB<>Vv*i2rrHfomt zT40x=RqqR*a?*hP|=qrw#Z13Z!w zbD@s9oMi*cAS0ZseA?l!LCc8$OQ&Kht6&T3%(o-ESx6HKjJTRM`EWtV0vRQDT0%Ep z1szRf(MLg=uAtp6n#o1JACT}AW!{XmN-Z$>{M(VqsfjppyYJFtBd zbGGe@xls#=RdnDM?1fU&BRHK6%{a2C5orZ$H-Y0sqS8RVf!_>4?^5R<-U0mI7LWM8 zr#FEs$|M8az$in{z+!{rNGpdKx(>y592@NrjF`7bZutVA%+QQtvEyh;SN{<&gvYe| z04c||X#Yw?-&nGai7M>C=x^l=W|1!7tAksWyA&@He2(ka^FB9tOpKd4NF~3?X%8b8 zP)Kad+`_&R3SeRY`>3PpLFF~+sm?27*~flS_{1RFaST^A&C89aB;LN6KWftsPA^9}ESx zD@e47_5Hp=xPSmld<YGO@A-ad`FfIgYj9O{rTOqf%7!f<;{jXdcd*~vlAvV5 z@8QB5Az79Qm;WjVB{P18cCcA1e+6B;xOsc!B4?XN4Or@ar4>cvzK`(OU*fk7NxgYu z3|Jkd{HP|NHhalMmlbT(k2_nA`O!JvJHJS@r?*Ze{VV{`Xs9qeQBa%uE2++2no?$Q z>E|^?E>J{_w6~DWZ%|7#3Qg{L&jZUZ0OW|7eOg@OFVD(IDJ8!q=(wZU8gt1OxG`{k zR_s*X8NCOmLw9Bg4s0ui-$X@~3Y-tKnf;V$zvvkGRW=&qw`AU^TBf z!E?0SoLUOe67$u*%xX@TQUgt2)?)u8DEw@;F;aD+2@ob0K@=lK$X9^ zxCF{12)zG%p8c3;;L96OX)Up%HjzV+f32?y*QA$~(b3i!s5eA^_6>4H=f) zieY-0;{mz{y3*roWXX;fM66j~)aTdSbn?sQr8j#L(i9HA-i8Cs;T8N55V2v*LQZp# z8Xuv!fC}G1zevr1quxL`m-O?b-Q{FqZ!dSI`}CY}=u+yRG4y`6R)ThHdosuRS8OKF zdX^kAW9TY-p7^@4x4WNO6#l8Jx|gd6N<7C0TYK7HZZmOAc64K?FC3$!OrR3Z-)vuQ z0Q@T5J%R%bVNyr~)|MVfyvc#llAz#R`vcwm3?i&d0jkCuP*7?D9(&p!1P)}N42zOc z-Ngp4R-oe6Opp z%u0*JVih0U?eI>FBdXybvr44D@UHIkGCgNK2N>UfE%BGj>u~mKRUF^a2A32HM>YdH zGzWmN_7iHTV?@GIMHq*wAwm5)$o%WZw0Aqq#!(@W6ex){&?w(xMRz@mT71;AERQQz zLDLU;2Yi@KO|m{){EqHn_+>T`1uXXk!&071Bo_Us?eoNiVm-?M=n_R2u1K|nr6CKD zWniv?`GLR7R_U6ts90?Cav{5?b~ibh2Z_@8Nnz({Gdsfh&f?NvDTY)9AH_q=(@&{_|l}G<006*wVPEx?>ZPQwC_-mbo%E>&I~j zvcxN;I@MC_a@CRbK_L#5KwvoJo<_A1vj?ivt3_Sl^UQa=Q{U{g!PIM6^I%$qQWyEm zS8lkWH=Sh`xr()~HF&mtgiu#&=huT^rY)K#-bW0}ie-fca5p;L?_1YyNHU zz6NGFzF(&SDEH5TLO08Y*N>Qjs*Z7Z=*0B#Q^2$S(!25-rh0iP+BIkHdYdM+v<#8B zqSJ;aq!uC-_N2TUn;B!ZV-_7z(ttqAI$ipPMx?Qqzl)Clez`DV5>pF~^mITY zv=+I}1|RhFs?~`f0&6vDv9GA5M{>~MY9s^p^n8YB4O$a8WY6oRH@ArDHaa@&C3^CA z1n8+$5?>CJ4k(+xW77Nmv#5I=sI~HLku71j|DDKJyyB4y}f3?W!(=& zHi}}hkbvyBeG-}vkCc)F&bZNj_Tytqn7`l@$gG*;q{OM1p0(6fCzKniRou8B1$gPL z(SkY*TAHD?3V_Bbj;xPSSS*GvGdeFpd_AIYm_J&s2Y1Ddvb#WfZ4jIL0kkfe^Z84| zhVpZ~kheobQ%5quPARyeRpl9Ea}52bYy=$7+;lMv{Qj?|HAB`++Ov{# zi6!Z7$q^yVyWL=~|9_9N)i_EbXedzrh5!=|WGF7M)d9@L58dgexlknwD@W~jvi>IU z^Y5WbBok60>a z+Z(^|BCdlnLfx1SOa(D+z&d=wLBO(0W{1;yv4P>BQ+6B{?X{zYECY7;)`Mrjw*J2W z%8?Ba+OAPw9tRKk2I;on4J>M%H~{1yWbZQm7#BEc@PuYI{6ZG=n!%VGQ6QZ^rO~Ikzk=>Im zE&$_~L2;?dB>QnCl}k(4=dV8K@S}3{>M4xm&C2q)%eO$78^rXUlgl502d@3Lc|G_- zj6-Yod?C4akp(=*o_s>qWQA&rnPOF3hojTb)kfYA*v-yDSzM~BQqZgPmx+Vh_CNUf zQ$PG@$yvu!;=SxOUn(-qKwgdCQEy*ag}(qH6YM=3`)b z&Mi>2GlWBe+tw#H_t2q*%$xnThS*IY=Sn-KiSKA!TVb~m@si|~?aQ(yv}3D!+!_QU z6+F4KJ0T6h&jd8<$rWH;4PdQu1yJL%w1SH?TUEwcXowDh0czX^U=V|H0OQ4N#Wvn& z=pFhp+5F?c&2?ed=^Ko~Dtp4mL)9v%65muizhs{Ci4A^lR6YMi;ZL%he9VHq(I6&L z0_6;SPLf!=i9=LLY2;q%8t*sK80yq>_2I! zYXXl5BtF1EHQ*zD9%;<<{jq=d-wKNIX(_RQN`2K~S%BtsTt|=%oOA5GV%$N|4&?zn zS7A#O)2;!$=xPAOO}Sd@)|&Bpt|N$W8QV%$&lz8ROD9uR3ip+v%yb$kq8Ovb4|)cV zcPTNv`9VoIoi63le-gOYcrP&Qw8}q+Du+k+{5iMWO8ZVhh3O*>IE;jaF%_alj@Rzv zpbzO8Q7Ci}`x4#XLzsj1W(BS)Q@NMszx0-n)t$%K<(eHE*&_K+ zZAQr{8WOa5Mw!8-`(SMaQ=1CMzDdbN-N@HqUy!S_4!tU-Kav(V#O5-qOiLwiToZln z`=WUQUCy2LuBGiuC>Tun^;i9=m#l;}m#)vXDVRi2+eXI|YFra37UP?@uf~~ArdRB@lrHiwZ#MQ#&F8U2KP+zk$tz%%oza?4w_3px8 z{f&vT#mv+V>YWhrw|3bFQ}n+%%+sra`2%PLme&Ou$W=EOi8n#xvl!6_oMu!yBumG+ z5rkXJm=78aSqZJE8Mlt)rX3K!-lf+rKlEHbSyUVy2WZQ%e6_6lT)9$V=1ANE+HUvI zz)()d%l`Z@DHBfu+Jcy&S7lqE>N3=;!|%CQ|9|C4#-F@()7q z^J+_UW=UHNCGUd~EVSWy>E3ls)zX)aH|#LCO`C|3>~jNk-&2IJ41c zhIP>ffjSj9zePAak3sFLQu@b68XN9SCq)Lm^yu1CBfJ@1>XMio3O9ikqwobXJdEcM z4nkD=Sc6~iR2_;m!*}=qWk;h$dp^h#b{H!V!PQ)oz`_b^v(jiYM9j2{d|MZ!Qji3R zrPmtnYKzfy#QQ+0ok3Hl^~SQ=u=R)QrKhN;NF&AWy(96Y;Jc&RVwaR9#tj@x)*J?W zL)}9PI&BH(lJf)bM&sMv&By(lOjOH1}D{2%QU4=05& ztfU<;PE9xEGKlr0cVCQvfxjU?*?48MZ`z$1&C4PH0c4bE{}Q8dfGEbQkj}3{<_-pm z=I5`+QY?Ur1{`T9;n$V#2gFwg%bX;etvJAp8VMi}Rv11!bxe%OP0qGj;Y$pX5D>tn zp)8R}VpT{xRzlXXV3PKH_$sH=HK=qB`?xuqR^coFpD{+u1MybVp7BN9)R*X$eM%9I z1av6%otyXJVHbzB=b>F@5F zbr_l`9?L4e4P3Y8$j+vqAi9x@Hp4s+;#$PBO}n~6^ZR`df%UbG%l%uTTQcRfKDsN+ z7x_<91uA|t20@eTH4waSXdjL^ZtBjmB8ao(gDkaaVWNk`CoIVp1-kVsy%mhczvG<9 z?&q&glH9(ssDZF3L+>VFF>RDDFI>a%b_p*Pfoo{-Mig4e`cZ??w>GhU>1A|;>cMpW zK6pE4c;x?b)PtH`XcEmN(r=neas!8a-uX=xKOP6co=Wt%usQh15ssO^sDIn6Qft5; zc>^56;1re#tad&!AK(?G--}F&Vc9m+{$h4LR9?Gp&eI2wFSyD>aj#qLfHfq)5?2aMX6SK5(?9re zPL#{tB6_p`7S{R@&??Pd40V!$DTvHTOvx+>C*pN15g_F$W?^6O8*+EhWtHm8{4S_D zi-|W1tJKFaX@eQ+LQ!-w5Ti|Bl$QwIKcuPMY+rBqZ|{Wz>S}H~OJaNcrAn$}&pjp_ z-4ySlqqGzq=#N`vkxRBDEY=vlg@uDB{V+I*IE=?&1qV!MaO<~G__{VC3rIg=1xC> zd9v&LQ=%72d`K5)cF!rAFK>f+_PIz5S$X>Tn55G1KNidoDx9Yq*gSWX))+-peZZ8d zaE@kpivUYsQzQLah55p<_^F)$b<}OqeeavXqX3HqR@SI^Nf{jE+_R}tLp)8#=cfeq z2QAU#76suHWKy5ZAeWI3g$Dkm5o(`qg|iE75UY^z;5N0I?RaVWYqNouiK&tK2)l2N zjnp@K<`%?@Z8H{ylaFs+l!zvG(dWheQ*R;q%8Z|bTC)oh8@&DMI#0RPtDo*|9K>lT zMYC)M??NS^HyhqO#k!kkg>(S*1R}<|Af0vp61b2I*$)uSFtl=^*@ubWcZ3mQ^QD8n zC_vV_Je{nMgub|sXc@h;uQB-EY-nS@QW4=1vR%d0S>vRQ{sk_p#v^WLw?oq=x#lo}NOg}j` zU_WJx4rmlH%jX8jMRE(>yF$|J5q1^k&Da!*CI_J_hxC^A7oZ({aiK4U5qWJTtAkq< zJYeVA^t0U5jmrSY$ST>}D48il>6T)$RI}utuu{*mI%!w@6JB<|f@l6FFS&=P;UcGS zaF28tc(qhB(%aHG8O4{k@D7n7Lw3BXhsLEb4+tD5sEd|&X!L5C5E&GsYegw5(D(tD zzx$U$VWh*G#-66o*g0N}!gic0`?(PY0V~TzUXY>((BM%|Y4 zv*H;GYPOd9N}r_hOXUYPQXXtD)teMp>ou`@hGdO0!CJduQJPQ#1il$;^9o4vK^As{ zlrozyTq)h{r4bTLp_>Hw;|TuFp3@hL?5D2e{A zRv^uG^UVvD6laTWPC<2e=gf?v;3GxM{D0f3=|u3uw3P7PMX)=xmm~9AyF?ehW5FE6 zrcFVg-QFTVqDLPeRy4ct<#QCM)dh0{G9$rFd?6>X-A&sKjsE(bw> zF&k$hgRF1M$8vZ;W$XbaMTp)%k{dSt6Vo)0?SYpYd#K!~t*CX`UzYm6; zhXCW*;kJp{7F$)TLHrvl0v96v`6CKtBSzsZS{f4OsY8+nJxI$+onQ1sX$w_F`@l?8 zf1c;G%nvh=vBg_mdpAEeA#Fj`@XFc-WV~!L%J4yYD$`q8?xLTyI`KHl@sZb9g`tjb z;kkdWZ;`a5ozWfcreG51guAY$ej%187~1$EgF@Ngu0p)v0-a=vInLtrLAK?pSK;DS zZe#J)ywVKKKAPog3nEJ$VAuIyu67WqM0TmMI_nPoiI{- zMDxFy(C-)k)GN1K)X{XX5o*8OzF(mjT;z&*PPB9f$QquV>?17(J|Kez2muGO>5=YW z*=?^GPY1Wv*S$mCuH)AS9*SN%gU1RPzzEYKIK@7Y&l-IWGg;$kKlGNLP({OTmzg4kvSKvE)>2-_i!^f+reux#kUq}xX2UETOL?~8C2kRi}a4b;C7F5;qx zXL?IVGz^{USP7iKLsal10+&v8wZpk%Tm+S*=gKSB=694J4+4YDws$HygepOiQT;a& zA|G}ol+Rm0uZQyqJ_mt_H)PTii6u?!VeH|+-#tL2PbjA^ed;nGdDCFQ+Z(eTKc<^3qLwEzC30S$g&>3SUR_BL0q_Rc_ z?8js;??MP1!)V8B?FW#Sp8tJdj8-m@9Z#fdvNuBn^R3t7mM`KuB2=Sx z-2i7v0PhzLQ+I&a^w?yTBR)w5+}~T|#Z@#&)8`94QC57n$V9O#^_6fiMeJQN{MO!A zGu1)%OiEDU->uRZ>+*CH1^5e;6@D&FJSER^%j$yjLWc;aQV|gA<4dhJ{u}xaxk`T5 zFSb*_2F^-9a}?=1d1asC=pDcv0v#^&;PI&@m`Qq`dyXRYRwRfi9nlPV3+QHPI}0&A zC@=GuMTg0H3NE7yAO{bsvVkbN?+Cg>G|IW5ReQok=@%m7Sn_#VIlZ8K@Mpg8or3f2 zF(vwdfb>CJbr@=k!P#0ETcb4~r#MCCJGbUzYDLOdk}eXKOzZ8NmK4fW)VM4}h3RZ>@bdNMwR>SCCw{Z*XzpWNV1d9s|?kuqhX7S*Du3?#8VX#8#l(%+$`$=SjpLqRy6#i*o`;N@U`3MRdybkNL85W|DXTV1@_ScZL%fIspm zOZ#!aRS9nU3#o60nk)>>^I=!8jQ0L#YmzZ_hm+|H2Ni`4KUtX`ys;`O8qAlzs2CGs zrST=_BFD_(=xFu7S3`9qsvXcu%2a1AoUpXxzOM?p1Gp*0%?UT9}SF}A;+K5ucCOZaHFuk1D2`+aJN-OrD_frECdKrd&{JpD22 z7pMq7CMYDUo1a5)xVW{u*Z7nV|8585X_m^9*x=n*UXB9$G>7zJ0+C`6)Zmy{7gI zFHOEA0F$@-z<^%fnRwK*5k6ubsUJGi(r&!4z__!Wx zNX2eGQRw@t2Vcii@}~6!fK;B+!;1NVbffYEoM&tI*KI83waD;sBvZnP#3ubt@Gu?lgK_h2nRiQYTr`^5y;;-#%&6%T;X@Tr97j642bxI)540$i~LmBn3XB_Ti1kecrC+= zk}`!|{~UxL8Ui}H>a!bWwB~AmS~y$T%N{dfM+zFW5`VH`p2 z2@iZMpoK1=cFdc?$N||0k0!|)()3!1f<(*cW@uI~jn(b5`%=20JZefP_iPEIwb+N* z;-6B$!)@SZrLPoAfceukpVb8TfU=G66h{IG-gJNq`@syDLg46mZ4#hj)B}qc(q_&( zfaKb>qmIvn;i}N&X0Zuu8e)#ium~!gGa1rjBp_KgT#*k*npY71Z=G>oVU|7SP~Zri$sI@AngWpo zZ(G&{CXk?=Rv3ulv=~3%>82`{{0yD-pbyXxZEnqf=ZqX81FZBw&+!A94Up|cwq3Pz zA!~k9*OT{{=J@0Ha!?g1{U$cpZnZfxVG_G(AU5zI9f@&(n16Qj_{{Y0CJngP9z{iV zIt5;NZ`+;r**FLQ;+;}Z7v0dJndcsef%q_ta_vcWW z@=rK09pcx{(LGa@+X6aAxPMqnUH1njjSsnGLp;Hgmx3)=PJOjW21ucYCFfq6SCxat z6eo|+xo{s2P@0#QHO_g_Tc&~+Y{nS3MbU|_0uVNY;_>BPeW1*R8jUfFa#g{?m^(*#z!QvK65;kkM;KX{ha4}w38F=O_$#Sr(J5V-Xw^RtM)K0VCzyp3^dcKXEycwJtO?GQeX&2iA4y;(N-`@ zyXeoJ#mC85{km`aY)k6Y$@BDo_TC@u+Ux34|20$gzkb`&eE09$Pc*-4kNh6*w?_Xq z#{Ig_eY%tUE!}-u-TPEE^()^~pY5_gZ87_4#)kTrx%1V3ZiRl@-}dSk?a{B>ZhyC1 zKWZ=gRR1-%_Sg5+_xot@_$b=4H~$VV6}#v=jMcHr#Be6+rTY6(rC^y%rs7-9+WeWo zh$Q2Tk0NFJ4(HvTLKSLkLzLroIdr+3DlKW1?fwx7TRK;`xyPDBoZ@w-ZVI~b6;Qk0 z=bH%-qc7OF3r&*~$qVPPN9}TZJ@!1(N6Mdup3n`^&JdYl_((ScLj`rqRj~uOJ z^%1q;S0x93Zz1V?>Hc8W-Rhres^^SjrIQ(y@vl`n=F7JpEpeD;Kn8+a6e_S4#uX%cM=5v6?4l!QIwYrMfA@s%Rgw1Bsn@V7Q-+%ID{VFfHb;p zDilG*aPNy|E7~?+2BD}s(YH0n37ml@zbsF;KPfXM3^)Ca{M%zdcg(<#zM5&{C{$il z)hy{hbc_+?H>ru^r6A)mpif$w7oiXkTgueg%uCJsUm~=ef8mnbz{Gu%obFC^G=3F8 zwE;p7Puee;R&V<^8k{=sVDlerAXBUk813fga_l{TQldhh*FneB&o<1e=Bm<<@Cv=E zZF8r{qGA19*$m zgk1BY5yvHsLqIzXJJ)$3i534So0o=%)^3elCQ7dcykW{m8IUUl6(19s zC5bAK{>CkzxbWyJbL2N9e2}OX9@Rb6T;QsIIccTV3LBOk8_7haK2V?NutaDbQPUub z!%?2}&El4`E-(KJ-2FS^CkQaG9UYW`rYV0la^&zzFZUIX4$kkg0JyH&IKJOB9AUI& z1x*deYubP1M{w3bs#9r_?;j?bGz_%!)T;wEMP#-dQy?#8sJLHc3D88@W1z-EDNsN!9@bU{#PVG-)3Fa&)6TVf&$g1Q-M2o@w-C@ZYkaPB2$ z*vJG{@++jYroagXHo&YG~Mb)M5(9&Kfw`L<>``p zueM`s+SX(TcmxdSarb{+D%D3TK&d3!)Pf40cNt&>#0}953B?xTi7s?a*nw8AmfT$4 z?Asv7!V)NIi3aZaKro8K&7Ao;!nbA5S9F2%IFU=b5SRk9)A@@ zvSY2m#z1eW#(JE`0ubDx{?_q}gR{9SVP?!ko!djHDEor5mzpEANH@JqwHvdjgu6d@8-t${Lq)r*0?0ef?MmO|AqxZoKaxR9HX zsn%KZ>_u^>3P%`&^cMGK@Z=mDA!a@5r9qp2+dqx+WKW&Vjf_GMn*1ecH=^=nlRJ6VZo-Vm&tT@AX#{@x zpmTNFkZH#i^TtmfI^%`hn6dEgXKAR91EY3{X3#TPZHSJOZf?}nkyRBcdTi9Xh8`ijHcTb)*lgSrW_r49!wR+RKFuQ9`CwLI=Q*DS@3(F4n8GC&T3WPQ`BrN>A zKfrq4KlQ^CV2PvL0$Tfzy{*3n&);>!fPu9bu-R`ZL~uI9y&Lmxdm9MP!4Ly~GFp!B zBo#1C?#i%o&JFAc2?G1)J8>rtAju@9pAtTtIzk6}>9-PL2^h7pg+}vI8@+&{KQ6_D z-`OB3Q5n0*m}vBWJGMBF3-2F{gB&Bl27Ae1SgA%2R!N{DfW0Ggt{$K54+X;%xacG| zur(`!DOTX1`Mm@J^&A}X45b|k8rN2to_d=PYjXS}0wlRcqM5)iZ*G$?miVzMOsCMM zjMsZ@3SoHhS8l!6Dx+CxkpObh3i_>JImHz~SMd8pP+b?BP}G`eYm(yv;cvTss@mdA zl@J#>0IEx;h|FAkX8<%Frjgls)ewpsLDG>>(W>9GM@_!TP>gGROvWwjVk@KBN&5)#<@MM; zH&0D7j~$?d9N5`;!aw19lb{asXpSTw);v$5_8Sbkc+;0Q-#Ji> zHU$E_=w-?+G|@-eiab$H^j#+Z8J5#gKD?e>S_* zT}f2Ld&Tv$5`Gx~Mmx&h=AQ$}PTu%%qMrOi*eo(XfG4l~nWgFTk7!#%AnZ1j?ygi6J!TAQJ zv>iM2yY~Nnbcbw0u??o|!x5ry!Pl?%o{YXcDa@utU> zlAlW1YXXsu7F6VPy$LXan^%{>6t_n8#ui5Z1ZZtH57Ue9ywkU>X|$p0LcyJV_T6{< zf1sdUOXN(w3jSG80U(gr-jl$dNAl)hVw=@An?{L;TknmGw#nzfzjZMZ#U)3&@C%5k zZ~cJq(m%`pMaAc6m@O(J*|(f_t>9gAmUtNT_*~WT+7W|=S&en9cW(2<;SHdYCquu( zEg&g_J(+5rBuBbHca5>ns0^-cxyF^>;|;do3$G zfprRLlBQ|aJU_%i*kmhkpma}ip&NMneB>8V$ zbOJVbvL-rK20qvd(zG0pSQjP=^ireJ`|$QZeieP}F1o|6sRg7|+i7dPYCC%W1J-C< zXM6J@8=FXPnkoQ`&>@5Re(h*`TrXDAFM+0U)SZ46f+lzWa(cU{rs+_IYP53#krbBi zUvD;|8Cpw)=Rm))FyDyqSrgP#Hj@Zy^2&o4WsLHW4+p1#YLtr6y1D8?t6uc3)Jt@? z9xGr0ORoPKN>@WIJSJh0lG2c5AIrJ*h$^acVA?+X{rfO>YJH3P=j(ntIisl4VGX11 zc?SdS-ylD_wfFYNy1#RnpeMb~j@;k^Tad&hJiu(IDVvQ%wmC&Sh+N~0NPojm2SBrV zOZ>5ZjTiDY`V?wKv>nUe&}BHTv4i_WTH-C?y(HyJ9ilmf1MnTRQorFDhvsA6AXZfy z0s*LRM^xx2CaOjx_7u#h++!T{ZRGVZ?$;)&UqOPp(f@wx;+K9^KP%FO-&y*m-e+uJ zj)?fGr&+E-$ZUEemX0uj#0yM-2%q{uL`HODD%wexQTZ#g-)G>Rr!t_W$<;sU=s1I2 zHqc|22@@=b9hJQcK`_X{1-BeHJ;|Be+Pjn-U1dRC*GpVl{2%JJd?%^cS-}hM7OZ&? z_65`1r7Nz0wr&CR3duyp?}sEmVCQOXHVe<@ZnANNh8B5s3<9fI6&>%pf|K)n&-^qi z^MU|yl}kd%LzV~W(Ory~@5&xurPHNxCI4n~E_X{R4&Eip9)b#f?_ zcFRd-G)iz_DSDWsv6!d5{1Grbp_7S$owVJJ>V;F3s%6LC!XCPC?c$C_ajVT{m!^{x z@;Jz=U5{OKABQ-r^vInkm2E--x8d}z&WTlILjC@5Fz5|~4-d<^=jYAjtmy$IvVCd7!f zWOA6V*j8n__)Wqkeg^BX1VbV2luILHvm94A6Hc3fV4pRdY08C7@#ogt7J43}R--Sk z*ev=RRbJZHD34^hDPHbOS#8NprIR#(F;!TNf_;a})RX0Prv3P(dd5M0zLe{rB7K`k zr*PrMJ=Jjbx4}xVyG|}r(2csBYsr-fJ}$nQ3YN9u~XU>+dW}tPpT$Xhn77H)Rb0pI$c6KA-uN zK4cRbB^diriC@_~^Qn`dLV;mzg67qwF!U!Vtl>wa!b6~=fN2F7R$%^{`G_UhotmGX zFq7lv*7C>Q>EE+xlX8iLgHDu09LsqQR%l2C$yI2S_1?E)!Yf`ek!eJedCs0btQs~^zo4KdeD=!BgNNUKJ-3xB@-)ky_yA3lYS87Z3(uU6%)b*-=MO zpexHL%W?wF4B@t=DTQ658v?r?^D@)?gHi;K8|=8_y)ZnPqxeX)zDf{_?--RcEuwPt zlo6_!M1plG#MdGLJ*tX%)|y_++N)Im3E8iKiP%*2BU0li{&$O3NsIvf%93M6XFdFl z<^bMSMROnbBT!TAKn7IMiu5t11_Fn-{eF^M4PvTc6I_)U|9y@V5Cqaal|=9_efdPb zrkCS5_zdU&J{UQRrZs}k9mQDLC79~}OF9o3q-SLu z{PUE&&Y+g1yCve>DL71eogD6|y$E4g*RO4_pIxIy>N3kGNZ@$Wr5p~OZh3M$)oP< z^Wd$Dg#)ZnB`1$6%-C~cb399OgqwpH$;~xUqms_ktmzL-802?$$3Y^-v>$lh+?LN> zt4`f_8a|e+U{-p6>tRrFREZb*oq5nC$1OV&USjm0SHf@4{*q0)hL@>SgE<}g7@yGY zcx`y3KepS;Sf2k9PKJQEZI*`tH4ol&lS@)w)J-%J#}4oJ?}W4j zmH3`S506lHaBnW7=1x88Bq#OtkaGc;c3R;jNN8d{n-z_*9Zl6TMs=noHqxjH0K96Y zd}1~?RpRIkk1Sm!GWB%x8Wv1V2x+z_DvnrM5gKJXE#S z^8?OYRI?#Ccl{^}_SE_bm;DU;95TH`-Dp)H@+(6q|UO{8ZFZ!Z2vzgkWw7)ixwEuYh=(Z3sW(kVv| z?HP|=skwC9q7hKsG+uzuy{*9LI9#~0+PRM>?l6j;HTKe(yN-o#aqG(3$N>u3vUJIO zcGvWlb1e2RLq5^4E8qEkb1UdW(sAT@A?XJZQxI{pa}wwtpx0b2c4ez>foHQs!sMqG zsaNq#TG!5%sk_%4&try#2#v&v3xvTzAIJHusaQ$4^qMegm+&q9;1}lc`#l9Riu9a1 zf`sJl)SErWEqi!l>v236i{_;tx`u?#Gkl_|;_4A-W_TE;YlDFyBR3#V|3SrJvEMsP z`{Qq#z=n`0&ObVboSxvJIIO1=Ij*z3#8?IyGEj(5p2SoPSt@#kVN`Eca&@tx(hTQs zzbIZn>NCdrqfH4#%+_k?0s=ceU@4NcFE?wL(i^{dKCjW-f-iUNW(eZ~?CPq?s?_30 zk%{(>{0$ENuD)CCJhRP#d;@dkOXytHa-p@jXoM*D-6Cq4Ti@>6a&dfp5glNajh zGF4t*gzVon==Yxb$vMToE0YvonyOt;H^>m40JaHu(jYRX)Xp_CON~6>F3$tuZMb)E zGPukLUoT#FK443(n}OA%2E)p(Zh}P=jGmoSB@6?|WynFjomhL7JXK~~U#Fww4*oi| zb4BiY6M**GefG&QH+h_8DE9IjR4$WRMK>4=Xiqs|!kq{3)|25#200ky4r8FsB|u$u&QS-9;4+0;g-7f1q)v zfzY^ZMze}}c|xA4Tl|7fhFOvESxT3Q^I!DxCR`UbLd_Dlnm`sq;JreSG4_!8M-nk} z!@X>pT@7MyKT>m>e8&7(k5HhwO3j#maupur&tVp;azsMb4NKM9C#?Q|6?vgd#$7b7Q_zLj?Sf&XOtpW7oO%2XO_qX`= zG8-BHFGLZ_Bg=y#FVakE848|$bqzoY7G;=|f*)oHG|E7RS8vC|MQqxwLiGPd=FRLp zjx4N`R;=*Td3elD@9=;nQzM3Y<`_XE-c2_$nn_*6!b{t?%kqNmBHTRn@uUnk;@Sny z!l9ZGEght*$Ja0Mn8`bER;UZZy_V@YyKeuC)6c9y?Y3~3#BgIsA z-GTdk=#@G3#hi!ozui+P!ey{aW9l1;NUrAZ)Kkw3vT^?f*;;6M5QMs}T<;N~oYeci z>nC8tmEDEN3@i+jr!e+Bb$qceFuDE}HJ=UPH|PUelHdL5=Ia2af(CaPGb&tKz}yWeOG!@;Qy zcFz`x8<}mw17xQRl|)b}emyB|QtOaG!KFt2m&zelDxP_NzY%L6U*s2WIum`uK8EHPlAqqJ~;N?g_jb>vIcMJvv3wTh;}z4!3QMkacv{))b^;pqu%o zqynho_@{2?l+qJiu9J}bsp`rlO$m2iM0byIoXCa)(@eszomyx{g@N?|UIb2NUZjpU zCKfFFE-kW*5!l|&w;W>-$%`S8Rh_ILqAV4YSB{W{Gc{}cx|H8b)5HUKB0o4Ar%W-X zA*+AooJSBf&O;e7gQgg?T!b9cWK2sAL~%paii)O9io%+2qry%74Xrc%V*C=jrx8_T zx4D3~J^WHw>dFN(NSd$5cay-%44@8$^zdT4PLDuKqADi;Jm#{bn~B^6lEl81Swlh! z$Gk8|CIMFjsQYC$1@@%gm;9!X-KP-QlaK8P^_qzI1-*AUf2x1g6u}K^!z{iPDXP1z zDL$7-Ng75k6YixL2&kMSwVdyC4IVc;vZN9L?S|9peNnJ&K)h)ld8Q0B`UYWQNof>*h6_TuM?pNgKA>ENUb9~hpDTG%5qE9UxLJDSmZ+-+bk zA}~fywJQC$8~cWut}~ldII`hA1W~HQ#L(SK)TUX0M~zk+a}a8~x>YX9W8&wkq@M){ z4jy;;+O)4rCIL00M@z=dN#VS{$h*-(BT{p~n__@@i`eLnHJRw$(f;%=u}b4Oe)VXTF02J<`MGVC#A{S!{1U6@LqFGZt>>8Qj>%wJz4uVVP#>&c zKPk*#N6Ti4wT@&w5=G<^7+g)eqEMaoGw8XppT&7IGs#5LSR zoS`YG+b*(gE$oC?l z#`)1bZ8_Fl-}uO7xm2N&LVSrr=$xTv#C_t4WRqnZS7(-6qcArz`1lOfqD-J5OrORq zR(@YZZ(C9~DjcT3EvLOgli?T%x!m&Fc#(n6>NTMGOSF`p!=1c4<}g9A!fDZQJRkj3)*Jsm~W4W+JHH9}TM= zZ8VszICh1B{Gt~qcOMg;s>12OWQphXr_VJK1P9qg2qX#to=N{vVof}m-!Gt@8@g%E zSzaoAmrX5_^?8n?0j{yR!Vp#MPx4c4eQwj#bu5-c^y^XUK-hHyGmy9FPN`bSzjx3; zr2ZMmQJ55tr0rNY$k{`##8p@9s4;byp24r5o}K5(yVChP>U8M&`s=R9CsSY>F(+?K zD4w#>qn@w&;YyJE^n_-J@u?otjgqB2p0vu89`Yh=3oR(y1_#>S`Q`%0M{1QPAjlarUzar6v zM^vR9r?Qb24PGK@Elf#|MNCaea?tkV!fJ**zNTO#lD@ literal 0 HcmV?d00001 diff --git a/mecodesktop_mac.spec b/mecodesktop_mac.spec index d5a02f9..6124016 100644 --- a/mecodesktop_mac.spec +++ b/mecodesktop_mac.spec @@ -1,5 +1,5 @@ # -*- mode: python -*- -a = Analysis(['desktop.py'], +a = Analysis(['mecodesktop.py'], pathex=['/Users/jkitzes/Projects/macroeco'], hiddenimports=['scipy.special._ufuncs_cxx'], hookspath=None, @@ -8,7 +8,7 @@ pyz = PYZ(a.pure) exe = EXE(pyz, a.scripts, exclude_binaries=True, - name='desktop', + name='mecodesktop', debug=False, strip=None, upx=True, @@ -21,8 +21,8 @@ a.binaries + [('libwx_osx_cocoau-3.0.0.0.0.dylib', a.datas, strip=None, upx=True, - name='desktop') + name='mecodesktop') app = BUNDLE(coll, - name='desktop.app', - icon=None) + name='Macroeco Desktop.app', + icon='icon.icns') From ad6e1fae1e36e7e1b4e04f7126ff0241ddb1c5db Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 22 Apr 2014 14:06:28 -0700 Subject: [PATCH 281/343] Add additional info to ANBO metadata --- demo/ANBO.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/demo/ANBO.txt b/demo/ANBO.txt index cc09bbb..a60db82 100644 --- a/demo/ANBO.txt +++ b/demo/ANBO.txt @@ -1,26 +1,32 @@ [Description] name = Anzo Borrego author = Mary Ellen Harte and John Harte -description = Vegetation census conducted at Anza-Borrego Desert State Park. Site in Indian Valley at N 32' 52.091", W 116' 14.447". Elevation 1195 feet. +description = Vegetation census conducted at Anza-Borrego Desert State Park. Site in Indian Valley at N 32' 52.091", W 116' 14.447". Elevation 1195 feet. Census was conducted on a 4 m x 4 m grid, with 16 grid cells each 1 m2 in area. citation = Unpublished datapath = ANBO.csv cols = spp_col:spp [year] +description = Year of census [cell] +description = Unique cell identifier, from 0 to 15 (total of 16 cells) [row] +description = Row of cell in gridded plot min = 0 max = 3 step = 1 [column] +description = Column of cell in gridded plot min = 0 max = 3 step = 1 [spp] +description = Name of species [count] +description = Number of individuals of a species in a cell From fd29b15402fdad6a2e388c497b93958d1056626d Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Tue, 22 Apr 2014 14:07:07 -0700 Subject: [PATCH 282/343] Expand ~ in metadata_path --- macroeco/empirical/_empirical.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index a226188..afe968b 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -93,8 +93,7 @@ class Patch(object): table : dataframe Table of census data recorded in patch meta : ConfigParser obj - Object similar to dict describing data table, loaded from metadata - file at metadata_path and processed by subset + Dict-like metadata, loaded from metadata_path and processed by subset subset : str Subset string passed as parameter @@ -134,7 +133,7 @@ def __init__(self, metadata_path, subset=''): self.table = None else: self.meta = ConfigParser() - self.meta.read(metadata_path) + self.meta.read(os.path.expanduser(metadata_path)) self.subset = subset self.table = self._load_table(metadata_path, self.meta['Description']['datapath']) @@ -158,7 +157,7 @@ def _load_table(self, metadata_path, data_path): """ - metadata_dir = os.path.dirname(metadata_path) + metadata_dir = os.path.dirname(os.path.expanduser(metadata_path)) data_path = os.path.normpath(os.path.join(metadata_dir, data_path)) extension = data_path.split('.')[-1] From 1a01175188958f8056586345b7d05b42e66df7f6 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 23 Apr 2014 09:25:29 -0700 Subject: [PATCH 283/343] Fix typos in README --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index da670a7..51fc6af 100644 --- a/README.rst +++ b/README.rst @@ -3,6 +3,6 @@ Macroeco: Ecological pattern analysis in Python Macroeco is a Python package that provides a comprehensive set of functions for analyzing empirical patterns in ecological data, predicting patterns from theory, and comparing empirical results to theory. Many major macroecological patterns can be analyzed using this package, including the species abundance distribution, the species and endemics area relationships, several measures of beta diversity, and many others. -Extensive documentation for macroeco, including detailed installation instructions, tutorials, and a reference guide, is available at http://macroeco.org. The most recent stable version of the macroeco package can be installed from PyPi (``pip install macroeco``). For users who do not program in Python, a standalone application called Macroeco Desktop, which provides most of the functionality of macroeco through a simple interface that requires no programming, is also available. +Extensive documentation for macroeco, including detailed installation instructions, tutorials, and a reference guide, is available at http://macroeco.org. The most recent stable version of the macroeco package can be installed from PyPI (``pip install macroeco``). For users who do not program in Python, a standalone application called Macroeco Desktop, which provides most of the functionality of macroeco through a simple interface that requires no programming, is also available. -The current version of macroeco was developed at the University of California, Berkeley by Justin Kitzes and Mark Wilber and is maintained by Justin Kitzes. Other contributors include Chloe Lewis and Ethan White. The development of macroeco has been supported by the National Science Foundataion, the Gordon and Betty Moore Foundation, and the Berkeley Institute for Global Change Biology. +The current version of macroeco was developed at the University of California, Berkeley by Justin Kitzes and Mark Wilber and is maintained by Justin Kitzes. Other contributors include Chloe Lewis and Ethan White. The development of macroeco has been supported by the National Science Foundation, the Gordon and Betty Moore Foundation, and the Berkeley Institute for Global Change Biology. From 57f23e7a995b9f24ff4e834709f905d6690f9529 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 23 Apr 2014 09:26:15 -0700 Subject: [PATCH 284/343] Comment out version from main logging Import was causing pyinstaller to fail --- macroeco/main/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index b28562a..3a7d1d9 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -17,7 +17,7 @@ import matplotlib as mpl import matplotlib.pyplot as plt -from .. __init__ import __version__ +#from .. __init__ import __version__ from .. import empirical as emp from .. import models as mod from .. import compare as comp @@ -46,7 +46,7 @@ def main(param_path='parameters.txt'): # Start logging log = misc.setup_log(base_options['results_dir']) - log.info('Running macroeco v%s' % __version__) + log.info('Running macroeco') # v%s' % __version__) log.info('Parameters file at %s' % os.path.abspath(param_path)) log.info('Starting analysis') From 2e9ba6daf7a701e8e5f8378d8fc3db6e14140de2 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 23 Apr 2014 09:27:01 -0700 Subject: [PATCH 285/343] Fix imports and update copyright --- doc/conf.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 2ab0417..1f95c5f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,10 +17,9 @@ import sys import os -from macroeco import __version__ sys.path.insert(0, os.path.abspath('..')) -#sys.path.insert(0, os.path.abspath('_ext/numpydoc')) +from macroeco import __version__ # -- General configuration ----------------------------------------------------- @@ -52,7 +51,7 @@ # General information about the project. project = u'macroeco' -# copyright = u'2013-2014, Justin Kitzes and Mark Wilber' +copyright = u'Justin Kitzes and Mark Wilber' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From 6a278dac7573847736edf6f493fbfd85a6b7e094 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 23 Apr 2014 10:42:55 -0700 Subject: [PATCH 286/343] Updates to main index page of docs, make automodule --- doc/index.rst | 27 +-------------------------- macroeco/__init__.py | 44 +++++++++++++++----------------------------- 2 files changed, 16 insertions(+), 55 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index 47f75fe..8c5b6aa 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,26 +1 @@ -.. macroeco documentation master file, created by - sphinx-quickstart on Sun Feb 16 21:19:54 2014. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Macroeco: Ecological pattern analysis in Python -=============================================== - -Welcome to macroeco. - -.. toctree:: - :maxdepth: 2 - - empirical - models - compare - misc - main - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`search` - +.. automodule:: macroeco diff --git a/macroeco/__init__.py b/macroeco/__init__.py index f374414..9cf8579 100644 --- a/macroeco/__init__.py +++ b/macroeco/__init__.py @@ -4,39 +4,25 @@ =============================================== Macroeco provides a comprehensive set of functions for analyzing empirical -patterns in data, predicting patterns using theory and models, and comparing -empirical results to theory. Many major macroecological patterns can be -analyzed using this package, including the species abundance distribution, the -species and endemics area relationships, several measures of beta diversity, -and many others. +patterns in ecological data, predicting patterns using theory and models, and +comparing empirical patterns to theory. Many major macroecological patterns +can be analyzed using this package, including the species abundance +distribution, the species and endemics area relationships, several measures of +beta diversity, and many others. -Extensive documentation for macroeco, including tutorials and a reference -guide, are available at http://macroeco.org. +Macroeco can be used either as a scientific python Package or through a high- +level interface called MacroecoDesktop. Users new to Macroeco should begin by +reviewing all of the tutorials found below. Experienced Python programmers who +wish to use the ``macroeco`` Python package can ``pip install macroeco`` and +refer to the Using macroeco tutorial and the Reference guide. -The package is organized into five submodules. +.. toctree:: + :maxdepth: 2 -Empirical provides a Patch class for reading data and metadata from an -empirical census and functions that calculate empirical macroecological metrics -based on that data. + tutorials + reference + about -Models provides a set of distributions and curves that have been proposed by -basic theory to describe macroecological metrics. - -Compare provides functions for comparing the empirical and modeled results. - -Misc provides a set of miscellanous functions, including several that aid in -formatting census data for use by functions in the empirical module. - -Main provides a programmatic interface to this package, known as Macroeco -Desktop, that allows a user to specify all of the parameters for an analysis in -a single parameters file, which is then executed, and results saved, with no -additional intervention needed. - -Macroeco was developed at the University of California, Berkeley, by Justin -Kitzes and Mark Wilber. Additional contributors include Chloe Lewis and Ethan -White. The development of macroeco has been supported by the National Science -Foundataion, the Moore Foundation, and the Berkeley Institute for Global Change -Biology. """ From 09848996af09bae290bc10687ed71ed4455c1c25 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 23 Apr 2014 10:43:16 -0700 Subject: [PATCH 287/343] Enable save button after new, in addition to after open --- mecodesktop.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mecodesktop.py b/mecodesktop.py index 1692d14..287da55 100755 --- a/mecodesktop.py +++ b/mecodesktop.py @@ -146,6 +146,7 @@ def OnNew(self,e): if self.askUserForFilename(style=wx.SAVE, **self.defaultFileDialogOptions()): self.OnSave(e, new_file=True) + self.save_button.Enable(True) def OnSave(self, event, new_file=False): f = open(os.path.join(self.dirname, self.filename), 'w') From 7da56ee293a7cb98fe5eb5ada8cf15b34c7177b1 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 23 Apr 2014 16:03:51 -0700 Subject: [PATCH 288/343] Complete cols variable in ANBO metadata --- demo/ANBO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/ANBO.txt b/demo/ANBO.txt index a60db82..ef1f5f3 100644 --- a/demo/ANBO.txt +++ b/demo/ANBO.txt @@ -5,7 +5,7 @@ description = Vegetation census conducted at Anza-Borrego Desert State Park. Sit citation = Unpublished datapath = ANBO.csv -cols = spp_col:spp +cols = spp_col:spp; count_col: count; x_col: row; y_col: column [year] description = Year of census From 75978a8190ddb1f95e9867881717d39b949685a1 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Wed, 23 Apr 2014 16:13:10 -0700 Subject: [PATCH 289/343] Complete update/revision of doc --- doc/about.rst | 9 ++ doc/reference.rst | 18 +++ doc/tutorial_getting_started.rst | 120 +++++++++++++++++++ doc/tutorial_macroeco.rst | 9 ++ doc/tutorial_macroeco_desktop.rst | 185 ++++++++++++++++++++++++++++++ doc/tutorial_own_data.rst | 88 ++++++++++++++ doc/tutorial_recipes.rst | 108 +++++++++++++++++ doc/tutorial_with_r.rst | 43 +++++++ doc/tutorials.rst | 13 +++ macroeco/__init__.py | 3 +- macroeco/compare/__init__.py | 3 - macroeco/empirical/__init__.py | 12 +- macroeco/empirical/_empirical.py | 35 +++--- macroeco/main/__init__.py | 4 +- macroeco/main/main.py | 4 + macroeco/misc/__init__.py | 17 ++- macroeco/models/__init__.py | 54 ++++++--- macroeco/models/_curves.py | 16 ++- 18 files changed, 690 insertions(+), 51 deletions(-) create mode 100644 doc/about.rst create mode 100644 doc/reference.rst create mode 100644 doc/tutorial_getting_started.rst create mode 100644 doc/tutorial_macroeco.rst create mode 100644 doc/tutorial_macroeco_desktop.rst create mode 100644 doc/tutorial_own_data.rst create mode 100644 doc/tutorial_recipes.rst create mode 100644 doc/tutorial_with_r.rst create mode 100644 doc/tutorials.rst diff --git a/doc/about.rst b/doc/about.rst new file mode 100644 index 0000000..a78e349 --- /dev/null +++ b/doc/about.rst @@ -0,0 +1,9 @@ +============== +About Macroeco +============== + +The current version of Macroeco was developed at the University of California, Berkeley by Justin Kitzes and Mark Wilber and is maintained by Justin Kitzes. Other contributors to current and previous versions include Chloe Lewis and Ethan White. + +Comments, bugs, and feature requests can be submitted to the developers by creating a `new issue `_ in the Macroeco GitHub repo. If you are submitting a bug, please include as much information as possible so that we can reproduce it. + +The development of macroeco has been supported by the National Science Foundation, the Gordon and Betty Moore Foundation, and the Berkeley Institute for Global Change Biology. \ No newline at end of file diff --git a/doc/reference.rst b/doc/reference.rst new file mode 100644 index 0000000..b247c1d --- /dev/null +++ b/doc/reference.rst @@ -0,0 +1,18 @@ +========= +Reference +========= + +The ``macroeco`` Python package is organized into five subpackages. + +The three core packages are empirical, models, and compare, which contain functions for analyzing empirical data, classes and objects for theoretical models, and functions for comparing the fits of models and data. + +The main subpackage contains the code for MacroecoDesktop, which allows a allows a user to specify all of the parameters for an analysis in a text file. The misc subpackage provides a set of miscellaneous functions. + +.. toctree:: + :maxdepth: 1 + + empirical + models + compare + misc + main diff --git a/doc/tutorial_getting_started.rst b/doc/tutorial_getting_started.rst new file mode 100644 index 0000000..7d91c28 --- /dev/null +++ b/doc/tutorial_getting_started.rst @@ -0,0 +1,120 @@ +=============== +Getting Started +=============== + +This tutorial provides an introduction to the basic use of Macroeco for ecological pattern analysis. + +The functionality of the software Macroeco can be accessed through two interfaces: the low-level Python package ``macroeco`` or the high-level MacroecoDesktop interface. + +The Python package ``macroeco`` is a scientific Python package that can be imported into a user's custom scripts and modules along with other scientific packages such as ``scipy`` or ``pandas``. + +The MacroecoDesktop interface is designed for users who wish to use the functionality of Macroeco but are not Python programmers. Instead of writing Python code, users of MacroecoDesktop create simple text files, known as parameters files, that describe an analysis and the type of desired output. MacroecoDesktop provides both a window-based graphical interface and a "headless" command line mode - the latter of these allows MacroecoDesktop to be called from other computing environments such as R. + + +.. _installation: + +Installation +============ + +For users with an existing scientific Python environment, the latest stable version of both ``macroeco`` and the MacroecoDesktop interface can be installed with ``pip install macroeco``. Several package dependencies may also be installed by this command. The latest development version of Macroeco can be found in the "develop" branch of the `Macroeco GitHub repo `_. + +Mac OS X users who wish only to use MacroecoDesktop can instead download the MacroecoDesktop application from this link. After unzipping, drag the MacroecoDesktop application into the Applications folder. + +Windows and Linux users who wish to use MacroecoDesktop will need to set up a scientific Python environment. The developers recommend the free `Continuum Anaconda `_ scientific Python installation for new users. After downloading and installing Anaconda, run the command ``pip install macroeco`` from a Terminal window. + +The remainder of this tutorial uses demo data from a vegetation census in Anza-Borrego Desert State Park in southern California. This demo data can be downloaded at this link. The file ANBO.csv contains the census data and the file ANBO.txt contains metadata describing the data table. This data may be freely shared and used for analysis so long as credit is given to the authors. + +.. _first-steps-macroeco: + +First steps: ``macroeco`` +========================= + +Users of MacroecoDesktop should skip this section and proceed below to :ref:`first-steps-macroeco-desktop`. + +The ``macroeco`` package contains three main subpackages of interest: + +* Empirical - loads data tables and performs empirical analysis of macroecological metrics, such as the species abundance distribution and species area relationship + +* Models - provides objects for distributions and curves predicted by macroecological theory, such as the logseries distributions and power law function + +* Compare - provides utility functions for comparing the fit of models to empirical metrics, such as AIC weights and r-squared statistics + +A common workflow involves loading data, calculating an empirical metric, fitting one or more models to the empirical metric, and evaluating the fit of the model to the metric. The following example calculates a simple species abundance distribution for the demo data. + +First, the ``Patch`` class from the empirical subpackage is used to create a Patch object that holds the data table and a metadata dictionary describing the data. ``Patch`` requires a path, absolute or relative, to a metadata file as a mandatory argument (see :ref:`own-data` for information on creating a metadata file for a new data set). + + >>> import macroeco as meco + >>> pat = meco.empirical.Patch('~/Desktop/demo/ANBO.txt') + +The empirical subpackage contains a number of functions that operate on patch objects and return macroecological metrics. Here we'll use the function ``sad`` to calculate a species abundance distribution. The first argument is the patch object to use, the second is a string specifying which column has the species names (spp_col) and which, if any, has a count of individuals at a particular location (count_col), and the third is a string specifying how to split the data (see the Reference guide for the functions in the empirical module for more information on input arguments). + + >>> sad = meco.empirical.sad(pat, 'spp_col:spp; count_col:count', '') + +All functions for macroecological metrics return their results as a list of tuples. Each tuple has two elements: a string describing how the data were split and a result table with a column ``y`` (for univariate distributions like the species abundance distribution) or columns ``y`` and ``x`` (for curves such as a species area relationship) giving the results of the analysis. Since the data were not split in this example, the list has only one tuple. + +Any number of distributions from the models subpackage can be fit to the resulting empirical metric. The code below fits the two parameters of the LINK upper truncated logseries distribution and uses the function ``AIC`` from the compare subpackage to calculate the AIC for this distribution and data. + + >>> p, b = meco.models.logser_uptrunc.fit_mle(sad[0][1]['y']) + >>> p, b + (0.9985394369365049, 2445.0) + >>> meco.compare.AIC(sad[0][1]['y'], meco.models.logser_uptrunc(p, b)) + 208.61902087378027 + +The two fitted parameters can be used to generate a rank abundance distribution of the same length as the empirical data. The empirical and predicted rank curves are plotted. + + >>> import numpy as np + >>> import matplotlib.pyplot as plt + >>> plt.semilogy(meco.models.logser_uptrunc.rank(len(sad[0][1]),p,b)[::-1]) + >>> plt.semilogy(np.sort(sad[0][1]['y'])[::-1]) + >>> plt.show() + +For information on performing more complex analyses using ``macroeco``, see :ref:`using-macroeco`. + + +.. _first-steps-macroeco-desktop: + +First steps: MacroecoDesktop +============================= + +This section describes the MacroecoDesktop interface. Mac OS X users who have downloaded the MacroecoDesktop standalone application will have a choice between accessing MacroecoDesktop through a graphical program or at the command line. Other users will only have access to MacroecoDesktop through the command line interface. Both methods of interaction are described below. + +The purpose of MacroecoDesktop is to provide non-programmers an interface for accessing the functionality of Macroeco without the need to write Python code. Instead, the user creates a text file, called a parameters file, that contains the information and instructions needed by MacroecoDesktop to execute an analysis. + +This section gives a very brief overview of how to create a simple parameter file and use it to analyze a species abundance distribution (the analysis and output are identical to that shown above in :ref:`first-steps-macroeco`). More information on the structure of parameter files and how to customize them can be found in the tutorial XXXX. + +To create a simple parameter file, open a text editor of your choice. Windows users can use Notepad, which can be accessed through the Start Menu. Mac users can use the program TextEdit, which is located in Utilities folder inside of the Applications folder. + +IMPORTANT: Mac users who use TextEdit should open the program and immediately go to the Format menu and select the option Make Plain Text. This will need to be done every time TextEdit is used to create a new document. Alternatively, you might wish to download a better text editor such as the free program `TextWrangler `_. + +To get started, type the following text into your text editor. Save this file with the name "new_parameters.txt" in the demo directory containing the ANBO.txt and ANBO.csv files. :: + + [SAD-ANBO] + + analysis = sad + + metadata = ANBO.txt + + models = logser_uptrunc; lognorm + log_y = True + + +A single parameter file can contain multiple "runs", each of which is denoted by the name of the run written in brackets (this run is titled "SAD ANBO", as it will analyze the species abundance distribution for the Anza-Borrego demo data). + +Conceptually, the information required for a single run can be broken down into three parts. The first part tells MacroecoDesktop the type of analysis that's desired, in this case a species abundance distribution (any function contained in the empirical or models subpackage of ``macroeco`` can be listed here as an analysis). + +The second part contains the information that MacroecoDesktop needs to complete the core analysis. To generate an empirical species abundance distribution, the necessary inputs are the location of a metadata file that both points to a data table and provides information about the data and a variable called "cols" that tells MacroecoDesktop which column in the data table represents the name of the species and which (if any) gives the count of individuals at a location. + +The third part describes what, if any, theoretical models should be compared to the core empirical result and what options should be used for the comparison. The models variable gives a list of distribution names to compare to the empirical data. An additional variable log_y specifies that the y-axis of output graphs should be log transformed. + +Once the parameter file has been created and saved, MacroecoDesktop can be called either from the graphical MacroecoDesktop program or from the Terminal. + +For Mac users who have downloaded the standalone MacroecoDesktop application, double click to launch the program. Use the Open button near the top to find and open the new_parameters.txt file that you just created. The parameters file will appear, and it can be edited and saved here again if desired. Once the parameter file is opened, click the Run button near the bottom. When the line "Finished analysis successfully" appears in the bottom window, the analysis is complete and the results are available. The results will be found in a folder named "results" in the same location as the new_parameters.txt file. + +For users who wish to access MacroecoDesktop from the terminal and who have installed ``macroeco`` in their Python environment, simply run the command ``mecodesktop path/to/new_parameters.txt``. Output about the analysis progress will be printed in the Terminal window, and the results will eventually be saved in a folder named "results" in the same location as the new_parameters.txt file. + +Mac users who have downloaded the standalone MacroecoDesktop application can also access MacroecoDesktop from the command line if desired. Presuming that the MacroecoDesktop program has been placed in the Applications folder, the command to use is ``/Applications/MacroecoDesktop.app/Contents/MacOS/mecodesktop path/to/new_parameters.txt`` + +For information on performing more complex analyses using MacroecoDesktop, see :ref:`using-macroecodesktop`. + + + diff --git a/doc/tutorial_macroeco.rst b/doc/tutorial_macroeco.rst new file mode 100644 index 0000000..1bb507e --- /dev/null +++ b/doc/tutorial_macroeco.rst @@ -0,0 +1,9 @@ +.. _using-macroeco: + +============== +Using macroeco +============== + +This tutorial describes the basic usage of the ``macroeco`` Python package. Users who wish to use the high-level MacroecoDesktop interface should refer to the :ref:`using-macroecodesktop` tutorial. + +Coming soon. \ No newline at end of file diff --git a/doc/tutorial_macroeco_desktop.rst b/doc/tutorial_macroeco_desktop.rst new file mode 100644 index 0000000..0a680e7 --- /dev/null +++ b/doc/tutorial_macroeco_desktop.rst @@ -0,0 +1,185 @@ +.. _using-macroecodesktop: + +===================== +Using MacroecoDesktop +===================== + +This tutorial describes the basic usage of the the high-level MacroecoDesktop interface. Users who wish to use the ``macroeco`` Python package should refer to the :ref:`using-macroeco` tutorial. + +This tutorial builds on the :ref:`first-steps-macroeco-desktop` tutorial, which should be completed first. + +There are three basic types of analysis that can be completed using MacroecoDesktop: analysis of an empirical ecological pattern, fitting models to an empirical pattern, and exploration of a model of a macroecological pattern without empirical data. This tutorial describes these three types of analysis in turn. + +Analyzing Empirical Patterns +============================ + +The first step in analyzing an empirical data set is to prepare the table and metadata file for the empirical data set as described in the :ref:`own-data` tutorial. It is generally most convenient to place the data table (usually in csv format) and the metadata file in the same folder. + +The second step is to prepare a parameters file to describe the desired analysis. A parameter file has a section for each run that is part of an analysis. Each run is independent of the others, and multiple runs may be combined in a single parameter file for convenience (for example, several analyses may be run on a single data set, or a single metric may be calculated for many data sets). + +An example of a run performing a species area analysis for the demo data set is shown below. :: + + [SAR] + analysis = ssad + + metadata = ANBO.txt + subset = column >= 2 + cols = spp_col:spp; count_col:count; x_col:row; y_col:column + splits = row:2 + divs = 1,1; 1,2; 2,1; 2,2 + ear = False + +Each run begins with a title that is enclosed in square brackets. The run title should not have any spaces in it. + +The first section following the run name contains a single variable ``analysis`` which gives the name of an empirical ecological pattern to analyze for this data set. The available empirical analyses in Macroeco |version| are + +.. currentmodule:: macroeco.empirical +.. autosummary:: + :toctree: generated/ + + sad + ssad + sar + comm_grid + o_ring + +The pages linked above from each analysis name describe the analysis and the different input parameters required to complete the analysis. Each of these input parameters can be specified here in the parameters file. + +For example, examining the ``sar`` metric above shows that this analysis takes five input parameters: + +* patch - a Patch object containing the empirical data +* cols - a string associating column headers in the data table with their uses +* splits - a string describing how and whether to split the data into multiple separate subsets before analysis +* divs - the number of divisions along coordinate columns +* ear - True or False, where True calculates an endemics area relationship and False calculates a species area relationship + +Each of these five input parameters is provided directly in the run shown above, with the exception of the ``patch`` parameter, which is described slightly differently. Although the descriptions below apply to the ``sar`` metric, many of the same input parameters are used by the other analysis metrics. + +In all empirical analyses in Macroeco, the first input parameter is a patch object. Instead of describing this object directly in MacroecoDesktop, the user first provides the ``metadata`` and, optionally, the ``subset`` input parameters. + +The first parameter, ``metadata``, gives the relative path to the metadata file from the parameter file (if the parameter file and metadata file are in the same folder, this is just the name of the metadata file). + +The second parameter, ``subset``, takes a subset of the empirical data for further analysis. Any logical mathematical statement beginning with a column name and ending with a number can be used here. For example, setting ``subset`` to ``year==2010; row < 2; spp=='cabr'`` would perform all subsequent calculations only for data in which the year column is 2010, the row column is greater than 2, and the species column is equal to 'cabr'. Multiple conditions are separated by semicolons. In the example run above, the SAR will be calculated only for columns 2 and 3 of the data. + +The next input parameter for an SAR analysis is ``cols``, which is a string describing which column in the data table should be used for which "special columns" in analysis. The five possible special columns are + +- spp_col - Unique species identifiers +- count_col - Number of individuals at a location +- x_col - x coordinate of location +- y_col - y coordinate of location +- energy_col - Energetic requirements of individual(s) at a location + +Analyses that do not have a spatial component (like a species abundance distribution without subsets or splits) require only spp_col and count_col (if one exists - if not, each record is taken to represent one individual). Spatial analyses, such as the species-area relationship, also require x_col and y_col. Energy metrics require energy_col. + +The ``cols`` parameter can also be set in the Description section of the metadata file, in which case it is not required here. If ``cols`` is set both in a run and in a metadata file, the value from the run takes precedence. + +The next input parameter is ``splits``, which provides a convenient way to divide a data set into separate analyses. The value ``year:split; row:2:``, for example, would split the data set into unique years and also into two subplots along the row column, each of equal length. The value before the ``:`` is a column name, and the value after is either a number (if a numeric column is to be split into equal sized divisions) or the word "split" (if a column is to be split among all unique values). + +This parameter is particularly useful if a column for plot ID, family name, functional group, etc. is present in the data table, in which case splitting on that column would perform an identical analysis for each different plot, family, group, etc. ``splits`` can also be used, for example, to split a plot into four subplots along two coordinate axes and perform a species area analysis within each one. + +The next input parameter is ``divs``, which gives the number of divisions to perform along the x and y columns. For example, ``3:2;`` will divide a plot into six subplots, with three "columns" formed by splitting the x axis into three parts and two "rows" formed by splitting the y axis into two parts. Here, ``1,1; 1,2; 2,1; 2,2`` will analyze the species area relationship for the entire plot, half plots (split in both directions), and quarter plots. + +The final input parameter, ``ear``, determines whether a species area or an endemics area relationship should be calculated. This is a boolean value that can be either True (endemics area relationship) or False (species area relationship). + +Once the parameters file has been created and saved, it can be executed using MacroecoDesktop by following the instructions at the end of the :ref:`first-steps-macroeco-desktop` tutorial. + +A sample parameter file containing runs that complete many of the above empirical analyses can be found in :ref:`recipes`. + + +Fitting and Comparing Models of Empirical Patterns +================================================== + +NacroecoDesktop can also be used to fit models to empirical data patterns, analyze the goodness of fit of these models, and to compare the fits of multiple models. This process is identical to that described above for analyzing empirical patterns, except that one additional set of input parameters is added to a run in the parameters file. :: + + [SAR] + analysis = ssad + + metadata = ANBO.txt + subset = column >= 2 + cols = spp_col:spp; count_col:count; x_col:row; y_col:column + splits = row:2 + divs = 1,1; 1,2; 2,1; 2,2 + ear = False + + models = power_law + log_x = true + log_y = true + +The third portion of this run begins with the input parameter ``models``, which can be set equal to one or several of the models within the ``macroeco`` package. If the metric is a curve, such as the species area relationship, the following models may be used. + +.. currentmodule:: macroeco.models +.. autosummary:: + :toctree: generated/ + + power_law + mete_sar + mete_iterative_sar + mete_ear + mete_iterative_ear + +If the metric is a probability distribution, the following models may be used (note that some are discrete and some continuous). + +.. autosummary:: + :toctree: generated/ + + expon + expon_uptrunc + lognorm + geom + geom_uptrunc + nbinom + cnbinom + logser_uptrunc + plnorm + plnorm_ztrunc + +More information about these models can be found by clicking on their names above. Some of these models have additional optional parameters that can be provided here (see the Methods section of the page for each individual model). + +Two special input parameters, ``log_x`` and ``log_y``, are used to log transform the x and y axes of the output plots created by MacroecoDesktop. + +As another example, the run below will calculate a species abundance distribution for the demo data set, fit both a lognormal and upper-truncated logseries distribution to the empirical data, and compare their fits. :: + + [SAD] + analysis = sad + + metadata = ANBO.txt + + models = logser_uptrunc; lognorm + log_y = True + +Note that no subsets or splits are given here, so that the entire data table is used for the analysis. The ``cols`` parameter is also not given, and the value of this parameter from the metadata file is used as a result. + +Exploring Models +================ + +Finally, MacroecoDesktop may also be used to explore the behavior of models without specific reference to empirical data. Given a set of model parameters, the "y" values of curves may be calculated for any "x" values, and the probability density, cumulative density, random variates, and many other values may be calculated for probability distributions. + +To see the possible options for exploring models, choose a model from the lists above and refer to the Methods section of that page. Any model and any method may be used with MacroecoDesktop so long as all of the input parameters required by that method are provided. Note that although the ``loc`` and ``scale`` parameters are listed for distributions, these are not used by Macroeco and should not be entered in a parameters file. + +For example, the parameter file below contains runs that calculate the pmf of a geometric distribution with a known shape parameter ``p``, calculate the ``p`` parameter of the upper-truncated geometric distribution from the distribution mean and aggregation parameter ``k``, fit the parameters of a lognormal distribution to a small data set, and draw 20 random variables from a conditioned negative binomial distribution. :: + + [Geom-pmf] + analysis = geom.pmf + + x = 0,1,2,3,4,5 + p = 0.5 + + [GeomUptrunc-p] + analysis = geom_uptrunc.translate_args + + mu = 5 + b = 20 + + [Lognorm-fit] + analysis = lognorm.fit_mle + + data = 2,2,5,8,4,3 + + [Cnbinom-random] + analysis = cnbinom.rvs + + mu = 10 + k_agg = 2 + b = 15 + size = 10 + diff --git a/doc/tutorial_own_data.rst b/doc/tutorial_own_data.rst new file mode 100644 index 0000000..ba558dc --- /dev/null +++ b/doc/tutorial_own_data.rst @@ -0,0 +1,88 @@ +.. _own-data: + +============== +Preparing Data +============== + +Both data tables and metadata must be provided to MacroecoDesktop and the package ``macroeco`` for empirical analyses. Data should be placed in a csv file following the basic structure described below. Metadata must also be prepared to describe features of the data table that cannot be inferred from the table itself (for example, the minimum and maximum values of the extent of a census, as these may be smaller and larger, respectively, than the minimum and maximum coordinates of recorded individuals). + +.. note + To avoid the possibility of errors, the names of the data table and metadata files should not contain any spaces. Additionally, the column headers within the data table must not contain any spaces. + +Preparing Data Tables +--------------------- + +Data should be prepared as a csv (comma separated values) file. The first row should contain column names, and each subsequent row should refer to a single record, most commonly a combination of a species identifier and a coordinate location. For point census data, each record will identify a single individual, while gridded census data will generally have an additional "count" column that gives the number of individuals of a species found in a grid cell. + +Other columns, such as those identify genera, plot ID, etc. may also be included. The ``splits`` argument used by the empirical data analysis functions can be easily used to divide the analysis according to the values found in any provided column. For example, splitting a species area analysis on a column containing a plot ID will perform a separate species area analysis for each unique plot. + +The demo data file ANBO.csv provides an examples of a correctly formatted data table file. + +Preparing a Metadata File +------------------------- + +Macroeco requires a metadata file to be provided along with each data table file (both MacroecoDesktop and ``macroeco`` require the user to provide the location of a metadata file, not the data table itself). The metadata file contains basic descriptive information about the data table as well as parameter values that are necessary for empirical data analysis. + +The format of the metadata file is very similar to the parameters files used to describe analyses for MacroecoDesktop. A metadata file has an initial section called Description, followed by a section containing information for each column in the data table. + +The metadata file ANBO.txt is shown here. :: + + [Description] + name = Anzo Borrego + author = Mary Ellen Harte and John Harte + description = Vegetation census conducted at Anza-Borrego Desert State Park. Site in Indian Valley at N 32' 52.091", W 116' 14.447". Elevation 1195 feet. Census was conducted on a 4 m x 4 m grid, with 16 grid cells each 1 m2 in area. + citation = Unpublished + + datapath = ANBO.csv + cols = spp_col:spp; count_col: count; x_col; row: y_col; column + + [year] + description = Year of census + + [cell] + description = Unique cell identifier, from 0 to 15 (total of 16 cells) + + [row] + description = Row of cell in gridded plot + min = 0 + max = 3 + step = 1 + + [column] + description = Column of cell in gridded plot + min = 0 + max = 3 + step = 1 + + [spp] + description = Name of species + + [count] + description = Number of individuals of a species in a cell + +The initial section, Description, begins with a number of variables providing basic information on the data table. + +The ``datapath`` variable in this section gives the location of the data table file relative to the metadata file. If the data file and metadata file are in the same directory, as is usually the case, then datapath is simply the name of the data table file. + +The ``cols`` variable here provides an opportunity to identify the columns in the data table that indicate different values used in empirical data analysis. The four special columns shown here, which are common to most data tables, are + +* spp_col - the species identifier +* count_col - optional column with the number of individuals of a species at a point (if count_col is not given, each row is taken to represent a single individual) +* x_col - the x coordinate of the record location +* y_col - the y coordinate of the record location + +The value of ``cols`` can also be specified separately in individual runs in MacroecoDesktop or when calling individual functions in ``macroeco``. The values given here in the metadata file are the defaults which are used if ``cols`` is not specified in these other locations. + +The remaining sections each refer to a column in the data table. Each section begins with a short description of the data in that column. Additionally, numeric columns (any column that can be split or subset by a numeric value) must have a minimum and maximum value and a step size giving the precision of the census. These are most commonly used with coordinate columns where, for example, the min and max values give the extent of the census and the step gives the minimum distance between two individuals. + +The demo metadata file ANBO.txt contains the metadata shown above. + +Using Data Files with Macroeco +------------------------------ + +Once the data and metadata files are prepared, they can be used with both MacroecoDesktop and ``macroeco``. + +In MacroecoDesktop, each run that involves empirical data analysis must contain the variable ``metadata_path``, which should indicate the path of the metadata file relative to the parameters file. If the parameters file and the data file are in the same folder, this is simply the name of the metadata file. + +In ``macroeco``, the absolute path to the metadata file (or the relative path from the present working directory) is a required argument to the Patch class of the empirical subpackage. Patch objects are required for all empirical pattern analysis using the functions in empirical. + diff --git a/doc/tutorial_recipes.rst b/doc/tutorial_recipes.rst new file mode 100644 index 0000000..ad93f86 --- /dev/null +++ b/doc/tutorial_recipes.rst @@ -0,0 +1,108 @@ +.. _recipes: + +======================= +MacroecoDesktop Recipes +======================= + +To provide a "jump start" on setting up analyses for MacroecoDesktop, the sample parameter file below contains a variety of runs that perform different types of calculations on the demo dataset provided with Macroeco. This file, or individual runs from this file (consisting of a run title in square brackets and all subsequent lines until the next run title), can be copied and pasted into parameters files and modified as needed. + +The lines beginning with the ``#`` symbol are comments. They are purely for information and are ignored by MacroecoDesktop. In some cases below, lines containing variables are prefaced by the ``#`` symbol, indicating that they are "commented out" and will not affect the analysis. Removing the ``#`` at the start of these lines will have the effect described in the associated comment for that line. :: + + # The runs below provide examples of empirical data analysis, some with + # model comparisons. + + # A simple species abundance distribution for the full plot + [SAD] + analysis = sad + + metadata = ANBO.txt + + models = logser_uptrunc; lognorm + log_y = True # Log transform the y axis of output plots + + # Four separate SAD's for the four quadrants of the plot + # cols is only required if it is not set in the metadata file + [SAD4] + analysis = sad + + metadata = ANBO.txt + #cols = spp_col:spp; count_col:count; x_col:row; y_col:column + splits = row:2; column:2 + clean = True # Remove species with 0 individuals from SADs + + models = logser_uptrunc; lognorm + log_y = True # Log transform the y axis of output plots + + # Empirical spatial abundance distribution for all 16 cells + [SSAD] + analysis = ssad + + metadata = ANBO.txt + divs = 4,4; + + # Species area relationship + [SAR ANBO] + analysis = sar + + metadata = ANBO.txt + divs = 1,1;1,2;2,1;2,2;2,4;4,4 + + models = mete_iterative_sar + #ear = True # Endemics area relationship instead of species area + log_y = True + log_x = True + + # Gridded commonality, calculating Sorensen index for each pair of cells + [Commonality] + analysis = comm_grid + + metadata = ANBO.txt + #subset = row>=2;column>=2 # Use only cells in rows 2-3 and columns 2-3 + cols = spp_col:spp; count_col:count; x_col:row; y_col:column + #splits = row:2 # Perform analysis once for rows 0-1 and again for 2-3 + divs = 2,2; + #metric = Jaccard # Use Jaccard instead of Sorensen index + + models = power_law + + # O ring measure of distance decay + # This measure is best suited to point count census data + [Oring] + analysis = o_ring + + metadata = ANBO.txt + cols = spp_col:spp; count_col:count; x_col:row; y_col:column + spp = 'crcr' + bin_edges = 0, 1, 2, 3, 4 + + # The runs below provide examples of model exploration + + # pmf of geometric distribution + [Geom-pmf] + analysis = geom.pmf + + x = range(10) # x values from 0 to 9 + p = 0.5 + + # Shape parameter of upper truncated geometric distribution + [GeomUptrunc-p] + analysis = geom_uptrunc.translate_args + + mu = 5 + b = 20 + + # Fit parameters of lognormal to a small data set + [Lognorm-fit] + analysis = lognorm.fit_mle + + data = 2,2,5,8,4,3 + + + # Draw random variates from a conditioned negative binomial distribution + [Cnbinom-random] + analysis = cnbinom.rvs + + mu = 10 + k_agg = 2 + b = 15 + size = 10 diff --git a/doc/tutorial_with_r.rst b/doc/tutorial_with_r.rst new file mode 100644 index 0000000..e9e2b0d --- /dev/null +++ b/doc/tutorial_with_r.rst @@ -0,0 +1,43 @@ +=========================== +MacroecoDesktop for R users +=========================== + +Users who primarily work in R can access the functionality of Macroeco through the command line MacroecoDesktop interface. + +First, install a working copy of MacroecoDesktop by following the installation instructions in :ref:`installation`. Windows and Linux users will need to install a Python environment and the ``macroeco`` package, while Mac users can instead install the standalone MacroecoDesktop program. Follow the :ref:`first-steps-macroeco-desktop` tutorial to create the "new_parameters.txt" file and ensure that your copy of MacroecoDesktop is working properly. + +For all platforms and installation options, the basic idea will be to call MacroecoDesktop from an R script using the command line interface, wait for the analysis to complete, and then read in any output tables saved by MacroecoDesktop that will be used for further analysis. + +As an example, the script below completes the following steps: + +* Writes a "new_parameters.txt" file describing a desired MacroecoDesktop analysis +* Uses the ``system`` command within R to execute the MacroecoDesktop analysis specified in "new_parameters.txt" +* Reads in the resulting data tables +* Plots gridded distance decay data with a best fit power law curve +* Prints out the R2 value for the power law fit to the data:: + + param_dir <- "~/Desktop/demo/" + param_file <- "new_parameters.txt" + + cat(" + [DistanceDecay] + + analysis = comm_grid + + metadata = ANBO.txt + cols = spp_col: spp; count_col: count; y_col: row; x_col: column + divs = 4,4; + models = power_law + ",file=paste(param_dir,param_file,sep=""), sep="\n") + + system(paste("mecodesktop ", param_dir, param_file, sep="")) + + data_models <- read.csv(paste(param_dir, "results/DistanceDecay/1_data_models.csv", sep="")) + test_statistics <- read.csv(paste(param_dir, "results/DistanceDecay/1_test_statistics.csv", sep="")) + + plot(data_models$x, data_models$empirical) + lines(data_models$x, data_models$power_law) + + test_statistics$R2 + +Mac users who installed the standalone MacroecoDesktop program should replace ``"mecodesktop "`` above with ``"/Applications/MacroecoDesktop.app/Contents/MacOS/mecodesktop "``. diff --git a/doc/tutorials.rst b/doc/tutorials.rst new file mode 100644 index 0000000..981dd74 --- /dev/null +++ b/doc/tutorials.rst @@ -0,0 +1,13 @@ +========= +Tutorials +========= + +.. toctree:: + :maxdepth: 1 + + tutorial_getting_started + tutorial_macroeco + tutorial_macroeco_desktop + tutorial_own_data + tutorial_with_r + tutorial_recipes diff --git a/macroeco/__init__.py b/macroeco/__init__.py index 9cf8579..ee9366b 100644 --- a/macroeco/__init__.py +++ b/macroeco/__init__.py @@ -19,10 +19,9 @@ .. toctree:: :maxdepth: 2 + about tutorials reference - about - """ diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index 9a82519..1e77d3d 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -7,9 +7,6 @@ distribution/curve to data or the fit of two distributions/curves to each other. -Comparison Functions -==================== - .. autosummary:: :toctree: generated/ diff --git a/macroeco/empirical/__init__.py b/macroeco/empirical/__init__.py index 457b776..5444e8d 100644 --- a/macroeco/empirical/__init__.py +++ b/macroeco/empirical/__init__.py @@ -3,13 +3,15 @@ Empirical (:mod:`macroeco.empirical`) ===================================== -This module contains functions used in the empirical analysis of -macroecological patterns. +This module contains functions used in the analysis of ecological patterns in +empirical data sets. Patch ===== -Patch is a class. +Patch is the core class of the empirical module. It reads and validates +metadata and data table files, and patch objects are the first argument to all +of the empirical metric functions in this module. .. autosummary:: :toctree: generated/ @@ -19,6 +21,9 @@ Metrics ======= +Each of these functions calculates an empirical ecological metric for a given +patch object. + .. autosummary:: :toctree: generated/ @@ -26,6 +31,7 @@ ssad sar comm_grid + o_ring Other ===== diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index afe968b..3bc6db1 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -36,22 +36,31 @@ second element is a dataframe giving the result.""" cols_note = \ - """The parameter ``cols`` is a dictionary with keys for four special - columns and values giving the column name in the patch data table - associated with each special column. + """The parameter ``cols`` is a string describing which column in the data + table should be used for which "special columns" in analysis. The five + possible special columns are - spp_col - Unique species identifiers - count_col - Number of individuals at a location - - energy_col - Energy of individuals - - mass_cal - Mass of individuals - - Only spp_col is always mandatory. Note that the value of spp_col may be - set to a columm in the data table giving the genus, family, functional - group, etc., which allows for analysis of this metric by those groups. - count_col is used when multiple individuals of a species may be found at - a single recorded location, as is the case in gridded censuses where all - individuals in a quadrat are "assigned" to a single point. energy_col - and mass_col are used for energy-based metrics.""" + - x_col - x coordinate of location + - y_col - y coordinate of location + - energy_col - Energetic requirements of individual(s) at a location + + For example, setting ``cols`` to ``spp_col: spp: count_col: number`` will + use the column named "spp" in the data table to represent the unique + species identifiers, and the column "number" in the data table to represent + the count of individuals at a point. + + Different special columns are required for different analyses. count_col is + used when multiple individuals of a species may be found at a single + recorded location, as is the case in gridded censuses where all individuals + in a quadrat are "assigned" to a single point. If count_col is not + specified, each record in the data table will be presumed to represent a + single individual (i.e., a count of 1). + + Note that the value of spp_col may be set to a columm in the data table + giving the genus, family, functional group, etc., which allows for analysis + of this metric by those groups. """ splits_note = \ """The parameter ``splits`` is a semicolon-separated string in the form of diff --git a/macroeco/main/__init__.py b/macroeco/main/__init__.py index 4019aaa..d8ba388 100644 --- a/macroeco/main/__init__.py +++ b/macroeco/main/__init__.py @@ -3,9 +3,7 @@ Main (:mod:`macroeco.main`) =========================== -This module contains functions that execute macroecological analyses specified -by user-generated `parameters.txt` configuration files. Instructions for -creating parameter files can be found here. +This module contains the functions that make up MacroecoDesktop. .. autosummary:: :toctree: generated/ diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 3a7d1d9..612293b 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -168,6 +168,7 @@ def _call_analysis_function(options, module): """ args, kwargs = _get_args_kwargs(options, module) + print args, kwargs return eval("%s.%s(*args, **kwargs)" % (module, options['analysis'])) @@ -179,6 +180,7 @@ def _get_args_kwargs(options, module): if module == 'emp': options = _emp_extra_options(options) arg_names, kw_names = _arg_kwarg_lists(module, options['analysis']) + print kw_names # Create list of values for arg_names args = [] @@ -260,6 +262,8 @@ def _arg_kwarg_lists(module, analysis): if obj_meth[1] not in ['fit_mle', 'translate_args']: arg_names += eval(module + '.' + obj_meth[0] + '.' + "shapes.replace(' ','').split(',')") + if obj_meth[1] == 'rvs': # Inspection for size not working + kw_names.append('size') except: pass diff --git a/macroeco/misc/__init__.py b/macroeco/misc/__init__.py index 1759d14..9bae656 100644 --- a/macroeco/misc/__init__.py +++ b/macroeco/misc/__init__.py @@ -6,13 +6,28 @@ This module contains miscellaneous functions that support the functions of other modules of macroeco. +Support Functions +================= + .. autosummary:: :toctree: generated/ setup_log + log_start_end inherit_docstring_from doc_sub - log_start_end + +""" +""" + +Data Formatting Functions +========================= + +.. autosummary:: + :toctree: generated/ + + data_read_write + format_dense """ diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 819bad1..d99c08d 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -3,27 +3,24 @@ Models (:mod:`macroeco.models`) =============================== -This module contains distributions commonly used in analysis of ecological -patterns. At present, all distributions here are univariate. +This module contains distributions and curves (i.e., standard mathematical +functions) commonly used in analysis of ecological patterns. -Most of these distributions are subclasses of `~scipy.stats.rv_continuous` and -`~scipy.stats.rv_discrete` found in `scipy.stats`. Additionally, several of the -distribution classes here are simple wrappers for existing distributions found -in `scipy.stats` that are updated to allow the use of common ecological -parameterizations. +Distributions +============= -Continouous distributions -========================= +All of the distributions here are subclasses of either +`~scipy.stats.rv_continuous` and `~scipy.stats.rv_discrete` found in +`scipy.stats`. Several of the distributions here are similar to or based on +existing distributions found in `scipy.stats` but are updated to allow the use +of common ecological parameterizations. -.. autosummary:: - :toctree: generated/ - - expon - expon_uptrunc - lognorm +In addition to all of the methods found in `scipy.stats`, methods for fitting +distributions and curves to data and for translating common distribution +arguments into formal parameters (i.e., deriving the ``p`` of the geometric +distribution from the distribution mean) are also provided in these classes. -Discrete distributions -====================== +The following discrete distributions are available. .. autosummary:: :toctree: generated/ @@ -36,6 +33,29 @@ plnorm plnorm_ztrunc +The following continuous distributions are available. + +.. autosummary:: + :toctree: generated/ + + expon + expon_uptrunc + lognorm + +Curves +====== + +Several common curves used in ecologial analysis are included here. + +.. autosummary:: + :toctree: generated/ + + power_law + mete_sar + mete_iterative_sar + mete_ear + mete_iterative_ear + """ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index b416265..6724f68 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -236,14 +236,10 @@ def fit_lsq(self, patch, cols, SAD_model_name, SSAD_model_name): class mete_sar_gen(curve): """ - The SAR predicted by the Maximum Entropy Theory of Ecology + A SAR/EAR predicted by the Maximum Entropy Theory of Ecology - .. math:: - - S = c x^z - - The generic SAR may be used either for downscaling, when values of A are - less than A0, or upscaling, when values of A are greater than A0. + The METE SAR and EAR may be used either for downscaling, when values of A + are less than A0, or upscaling, when values of A are greater than A0. Downscaling creates the traditional SAR known to ecologists, while upscaling is useful for estimating large-scale species richness from small- scale plot data. @@ -251,8 +247,8 @@ class mete_sar_gen(curve): A keyword argument iterative is available (default is False). If True, the SAR is calculated at successive A values, with the result at each value of A used as the base values of S and N for the subsequent calculation. The - iterative form was used in is the form used in Harte et al [#]_, although - note that the implementation here uses a different internal equation. + iterative form was used in Harte et al [#]_, although note that the + implementation here uses a different internal equation. Methods ------- @@ -376,6 +372,8 @@ def fit_lsq(self, df): and SSAD mdoels based on SAR output. Name ``fit_lsq`` is retained for consistency with other curves. + The first row of the empirical dataframe must be for an area A = A0. + """ # Just return S0 and N0 at largest scale, which is first row of df return df['n_spp'].values[0], df['n_individs'].values[0] From 0e0aea9985705f13fdb97fd7992ca7058d5294e9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 24 Apr 2014 09:39:58 -0700 Subject: [PATCH 290/343] Remove print statements from debugging --- macroeco/main/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 612293b..93e81b3 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -168,7 +168,6 @@ def _call_analysis_function(options, module): """ args, kwargs = _get_args_kwargs(options, module) - print args, kwargs return eval("%s.%s(*args, **kwargs)" % (module, options['analysis'])) @@ -180,7 +179,6 @@ def _get_args_kwargs(options, module): if module == 'emp': options = _emp_extra_options(options) arg_names, kw_names = _arg_kwarg_lists(module, options['analysis']) - print kw_names # Create list of values for arg_names args = [] From 84cd45410ac04025fcc03dcd49375bbb443450cd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 24 Apr 2014 09:40:10 -0700 Subject: [PATCH 291/343] Remove space from MacroecoDesktop app name --- mecodesktop_mac.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mecodesktop_mac.spec b/mecodesktop_mac.spec index 6124016..17d38a9 100644 --- a/mecodesktop_mac.spec +++ b/mecodesktop_mac.spec @@ -23,6 +23,6 @@ a.binaries + [('libwx_osx_cocoau-3.0.0.0.0.dylib', upx=True, name='mecodesktop') app = BUNDLE(coll, - name='Macroeco Desktop.app', + name='MacroecoDesktop.app', icon='icon.icns') From 27ff4cabde3597dcf9de72d83e7238e31bf660c4 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 24 Apr 2014 09:40:30 -0700 Subject: [PATCH 292/343] Throw more useful error in o_ring if shapely not available --- macroeco/empirical/_empirical.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 3bc6db1..98ce25b 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -712,6 +712,11 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): """ + try: + geo.box(0, 0, 1, 1) + except: + raise ImportError, "O-ring analysis requires shapely package" + (spp_col, count_col, x_col, y_col), patch = \ _get_cols(['spp_col', 'count_col', 'x_col', 'y_col'], cols, patch) From 9ef8d527c6341b94807af2673d32f4b7652e5a3a Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 24 Apr 2014 09:47:46 -0700 Subject: [PATCH 293/343] Small edits to doc index page --- doc/reference.rst | 2 ++ macroeco/__init__.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/reference.rst b/doc/reference.rst index b247c1d..8a25a0f 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -1,3 +1,5 @@ +.. _reference: + ========= Reference ========= diff --git a/macroeco/__init__.py b/macroeco/__init__.py index ee9366b..645c0b7 100644 --- a/macroeco/__init__.py +++ b/macroeco/__init__.py @@ -12,16 +12,16 @@ Macroeco can be used either as a scientific python Package or through a high- level interface called MacroecoDesktop. Users new to Macroeco should begin by -reviewing all of the tutorials found below. Experienced Python programmers who -wish to use the ``macroeco`` Python package can ``pip install macroeco`` and -refer to the Using macroeco tutorial and the Reference guide. +reviewing the tutorials found below. Experienced Python programmers who wish to +use the ``macroeco`` Python package can ``pip install macroeco`` and refer to +the :ref:`using-macroeco` tutorial and the :ref:`reference` guide. .. toctree:: :maxdepth: 2 - about tutorials reference + about """ From b1e3aaf4d49b041e472b8728b0de70448b638633 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 24 Apr 2014 11:08:36 -0700 Subject: [PATCH 294/343] Fix divs/splits synatx in SSAD recipe --- doc/tutorial_recipes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorial_recipes.rst b/doc/tutorial_recipes.rst index ad93f86..634384d 100644 --- a/doc/tutorial_recipes.rst +++ b/doc/tutorial_recipes.rst @@ -38,7 +38,7 @@ The lines beginning with the ``#`` symbol are comments. They are purely for info analysis = ssad metadata = ANBO.txt - divs = 4,4; + splits = row: 4; column: 4 # Species area relationship [SAR ANBO] From 03d41b5f209d30c766bcac6d41fdca54c28eb7d8 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sat, 10 May 2014 11:24:02 -0700 Subject: [PATCH 295/343] Fixed conflict in empirical --- macroeco/empirical/_empirical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 98ce25b..8a28283 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -445,7 +445,7 @@ def sar(patch, cols, splits, divs, ear=False): Returns ------- - {1} Result has three columns, div, x, and y, that give the ID for the + {1} Result has 5 columns; div, x, and y; that give the ID for the division given as an argument, fractional area, and the mean species richness at that division. From 8ac89d7217743ed74a46552e0ad6be80316e409a Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 12 Jun 2014 18:07:31 -0700 Subject: [PATCH 296/343] Building discrete gamma --- macroeco/models/__init__.py | 2 +- macroeco/models/_distributions.py | 59 +++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index d99c08d..6b8da7b 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -60,7 +60,7 @@ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, logser_uptrunc, plnorm, plnorm_ztrunc, - expon, expon_uptrunc, lognorm) + expon, expon_uptrunc, lognorm, dgamma) from ._curves import (power_law, mete_sar, mete_iterative_sar, diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index e893c69..1ef4870 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -372,6 +372,65 @@ def p_eq(x, mu, b): _geom_solve_p_from_mu_vect = np.vectorize(_geom_solve_p_from_mu) + +class dgamma_gen(rv_discrete_meco): + r""" + A discrete gamma random variable + + From Frank 2011 + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, alpha, theta): + return alpha, theta + + @inherit_docstring_from(rv_discrete_meco) + def fit_mle(self, data): + + alpha0 = 1 + theta0 = .9 + b = np.sum(data) + + def mle(params): + return -np.sum(np.log(self.pmf(data, params[0], params[1], b))) + + # Bounded fmin? + alpha, theta = optim.fmin(mle, x0=[alpha0, theta0], disp=0) + + return alpha, theta, b + + def _pmf(self, x, alpha, theta, b): + + #b = 1e5 # Upper cutoff + eq = lambda val, talpha, ttheta: val**(talpha - 1) * ttheta**val + + norm = np.sum(eq(np.arange(1, b[0] + 1), alpha[0], theta[0])) + + return eq(x, alpha, theta) / norm + + def _cdf(self, x, alpha, theta, b): + + # Repeating code from plnorm...can we make this more generic? + alpha = np.atleast_1d(alpha) + theta = np.atleast_1d(theta) + b = np.atleast_1d(b) + x = np.atleast_1d(x) + + max_x = np.max(x) + pmf_list = self.pmf(np.arange(np.int(max_x) + 1), alpha[0], theta[0], + b[0]) + full_cdf = np.cumsum(pmf_list) + + cdf = np.array([full_cdf[tx] for tx in x]) + + return cdf + + def _argcheck(self, alpha, theta, b): + return True + +dgamma = dgamma_gen(name='dgamma', shapes='alpha, theta, b') + + class nbinom_gen(rv_discrete_meco): r""" A negative binomial discrete random variable. From 5dc71cdecdb062b6897a4f98438d10ceee628726 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 15 Jun 2014 23:48:44 -0700 Subject: [PATCH 297/343] Added Pueyo bins --- macroeco/compare/_compare.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/macroeco/compare/_compare.py b/macroeco/compare/_compare.py index d2a9921..4232159 100644 --- a/macroeco/compare/_compare.py +++ b/macroeco/compare/_compare.py @@ -293,3 +293,33 @@ def preston_bin(data, max_num): hist_data = np.histogram(data, bins=boundaries) return hist_data + + +def pueyo_bins(data): + """ + Binning method based on Pueyo (2006) + + Parameters + ---------- + data : array-like data + Data to be binned + + Returns + ------- + : tuple of arrays + binned data, empirical probability density + + Notes + ----- + Bins the data in into bins of lenth 2**i, i=0, 1, 2 ... + The empirical probability densities will sum to 1 if multiplied by the + respective 2**i. + + """ + log_ub = np.ceil(np.log2(np.max(data))) + bins = 2**np.arange(log_ub + 1) + binned_data = np.histogram(data, bins=bins)[0] + epdf = (1 / bins[:-1]) * binned_data / len(data) + return binned_data, epdf + + From 89159082ef851bc2a9d4c5c94d81053d9b071c86 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 15 Jun 2014 23:51:23 -0700 Subject: [PATCH 298/343] Fixed bug in logseries rank --- macroeco/models/_distributions.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 1ef4870..4ffd737 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -417,11 +417,11 @@ def _cdf(self, x, alpha, theta, b): x = np.atleast_1d(x) max_x = np.max(x) - pmf_list = self.pmf(np.arange(np.int(max_x) + 1), alpha[0], theta[0], + pmf_list = self.pmf(np.arange(1, np.int(max_x) + 1), alpha[0], theta[0], b[0]) full_cdf = np.cumsum(pmf_list) - cdf = np.array([full_cdf[tx] for tx in x]) + cdf = np.array([full_cdf[tx - 1] for tx in x]) return cdf @@ -703,6 +703,10 @@ def translate_args(self, mu, b): @inherit_docstring_from(rv_discrete_meco) def fit_mle(self, data, b=None): + """%(super)s +b : float + The upper bound of the distribution. If None, fixed at sum(data) + """ data = np.array(data) length = len(data) @@ -714,7 +718,7 @@ def fit_mle(self, data, b=None): def _pmf(self, x, p, b): - x = np.array(x) + x = np.atleast_1d(x) p = np.atleast_1d(p) b = np.atleast_1d(b) @@ -729,7 +733,7 @@ def _pmf(self, x, p, b): def _cdf(self, x, p, b): - x = np.array(x) + x = np.atleast_1d(x) p = np.atleast_1d(p) b = np.atleast_1d(b) From 7d2fbfd508d77bcbc4db6eb8d119d3e4e869233b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 15 Jun 2014 23:51:48 -0700 Subject: [PATCH 299/343] Truncated discrete gamma distribution --- macroeco/models/_distributions.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 4ffd737..9a93d15 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -401,12 +401,18 @@ def mle(params): def _pmf(self, x, alpha, theta, b): - #b = 1e5 # Upper cutoff + alpha = np.atleast_1d(alpha) + theta = np.atleast_1d(theta) + b = np.atleast_1d(b) + x = np.atleast_1d(x) + eq = lambda val, talpha, ttheta: val**(talpha - 1) * ttheta**val norm = np.sum(eq(np.arange(1, b[0] + 1), alpha[0], theta[0])) - return eq(x, alpha, theta) / norm + pmf = eq(x, alpha, theta) / norm + pmf[x > b] = 0 + return pmf def _cdf(self, x, alpha, theta, b): From 4475052639aafd058b3c77051645c536b479fc98 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 15 Jun 2014 23:52:13 -0700 Subject: [PATCH 300/343] Added pueyo bins to init --- macroeco/compare/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index 1e77d3d..0c48c6d 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -22,4 +22,4 @@ from ._compare import (nll, lrt, AIC, AIC_compare, sum_of_squares, r_squared, - preston_bin) + preston_bin, pueyo_bins) From b4eda807f75cfb26e1d3cd4ca3fe348f314d3008 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 15 Jun 2014 23:52:34 -0700 Subject: [PATCH 301/343] Started building logseries --- macroeco/models/_distributions.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 9a93d15..7354849 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -662,6 +662,14 @@ def _solve_k_from_mu(data, k_range, nll, *args): return k_array[min_nll_idx] +class logser_gen(rv_discrete_meco): + """ + Logseries random variable + """ + + def _pmf(x, p): + pass + class logser_uptrunc_gen(rv_discrete_meco): r""" From 0d52c49ec36c3159847a2734ee41921b15e41fbe Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 18 Jun 2014 20:07:58 -0700 Subject: [PATCH 302/343] Reparameterized dgamma --- macroeco/models/_distributions.py | 75 ++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 7354849..3165c27 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -377,64 +377,93 @@ class dgamma_gen(rv_discrete_meco): r""" A discrete gamma random variable - From Frank 2011 + .. math:: + + P(x) = k * x^(\alpha - 1) * e^{(-1 / \theta)*x} + + for ``x >= 1``, ``\alpha > 0`` and ``\theta > 0``. + ``k`` is the normalizing constant. + + Methods + ------- + translate_args(alpha, theta) + not used, returns alpha and thet. + fit_mle(data) + ml estimate of shape parameters alpha and theta given data + %(before_notes)s + alpha : float + distribution parameter + theta : float + distribution parameter + + Notes + ----- + This parameterization of the discrete gamma was taken from [#]_. + + References + ---------- + .. [#] + Frank, F. (2011). Measurement scale in maximum entropy models of species + abundance. Journal of Evolutionary Biology, 24(3), 485-496 + """ @inherit_docstring_from(rv_discrete_meco) def translate_args(self, alpha, theta): return alpha, theta + @inherit_docstring_from(rv_discrete_meco) - def fit_mle(self, data): + def fit_mle(self, data, initial_guess=(1, .9), b=1e5): - alpha0 = 1 - theta0 = .9 - b = np.sum(data) + alpha0 = initial_guess[0] + theta0 = initial_guess[1] def mle(params): - return -np.sum(np.log(self.pmf(data, params[0], params[1], b))) + return -np.sum(np.log(self.pmf(data, params[0], params[1]))) # Bounded fmin? alpha, theta = optim.fmin(mle, x0=[alpha0, theta0], disp=0) - return alpha, theta, b + return alpha, theta - def _pmf(self, x, alpha, theta, b): + def _pmf(self, x, alpha, theta): + b = 1e5 alpha = np.atleast_1d(alpha) theta = np.atleast_1d(theta) b = np.atleast_1d(b) x = np.atleast_1d(x) - eq = lambda val, talpha, ttheta: val**(talpha - 1) * ttheta**val + eq = lambda val, talpha, ttheta: val**(talpha - 1) * \ + np.exp((-1 / ttheta)*val) norm = np.sum(eq(np.arange(1, b[0] + 1), alpha[0], theta[0])) pmf = eq(x, alpha, theta) / norm - pmf[x > b] = 0 return pmf - def _cdf(self, x, alpha, theta, b): + def _cdf(self, x, alpha, theta): - # Repeating code from plnorm...can we make this more generic? alpha = np.atleast_1d(alpha) theta = np.atleast_1d(theta) - b = np.atleast_1d(b) x = np.atleast_1d(x) max_x = np.max(x) - pmf_list = self.pmf(np.arange(1, np.int(max_x) + 1), alpha[0], theta[0], - b[0]) + pmf_list = self.pmf(np.arange(1, np.int(max_x) + 1), alpha[0], + theta[0]) full_cdf = np.cumsum(pmf_list) - cdf = np.array([full_cdf[tx - 1] for tx in x]) + cdf = np.array([full_cdf[tx - 1] if x != 0 else 0 for tx in x]) return cdf - def _argcheck(self, alpha, theta, b): - return True + def _argcheck(self, alpha, theta): + + # TODO: Can theta or alpha be 0 in the discrete version? + return (alpha > 0 and theta > 0) -dgamma = dgamma_gen(name='dgamma', shapes='alpha, theta, b') +dgamma = dgamma_gen(name='dgamma', shapes='alpha, theta') class nbinom_gen(rv_discrete_meco): @@ -662,14 +691,6 @@ def _solve_k_from_mu(data, k_range, nll, *args): return k_array[min_nll_idx] -class logser_gen(rv_discrete_meco): - """ - Logseries random variable - """ - - def _pmf(x, p): - pass - class logser_uptrunc_gen(rv_discrete_meco): r""" From 7a2a28e1b3a9a482a657f5514fb79130d15aa40b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 18 Jun 2014 20:08:18 -0700 Subject: [PATCH 303/343] Unittested dgamma --- macroeco/models/test_distributions.py | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 332f9e1..67e6003 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -208,6 +208,43 @@ def test_zillio_plots(self): # fig.savefig("test_cbinom") +class TestDgamma(TestCase): + + def test_pmf(self): + # import macroeco_distribution as mac + # mac.dis_gamma_ll([1,1,2,5,6,7], 5, .3) + test_val = -32.3085384957 + pred_val = np.sum(dgamma.logpmf([1, 1, 2, 5, 6, 7], 5, .3)) + assert_almost_equal(test_val, pred_val) + + # ab = [1, 1, 1, 1, 2, 4, 4, 4, 4, 4, 45, 267] + # mac.dis_gamma_ll(ab, 0.1, 200) + test_val = -39.889246913391531 + ab = [1, 1, 1, 1, 2, 4, 4, 4, 4, 4, 45, 267] + pred_val = np.sum(dgamma.logpmf(ab, 0.1, 200)) + assert_almost_equal(test_val, pred_val) + + def test_cdf(self): + # Test that cdf gets close to one + assert_almost_equal(dgamma.cdf(1000, 4, .9), 1) + + def test_fit_mle(self): + # mac.dis_gamma_solver([1,1,2,5,6,7]) + fit_alpha = 1.1324749 + fit_theta = 2.86753 + alpha, theta = dgamma.fit_mle([1, 1, 2, 5, 6, 7]) + assert_almost_equal(fit_alpha, alpha, decimal=3) + assert_almost_equal(fit_theta, theta, decimal=3) + + def test_rank(self): + # When alpha is almost zero should be similar to logseries with p = + # e^(-1 / theta) + logseries_rank = logser_uptrunc.rank(10, np.exp(-1 / 3), 1000) + dgamma_rank = dgamma.rank(10, 0.0001, 3) + + assert_array_equal(logseries_rank, dgamma_rank) + + class TestLogserUptrunc(TestCase): def test_pmf(self): From 3a95dbe1cf3de1e779d431c3b4aca35a97e313be Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 19 Jun 2014 12:25:06 -0700 Subject: [PATCH 304/343] Removed lower bound on dgamma alpha --- macroeco/models/_distributions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 3165c27..17ea9eb 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -412,12 +412,13 @@ class dgamma_gen(rv_discrete_meco): def translate_args(self, alpha, theta): return alpha, theta - @inherit_docstring_from(rv_discrete_meco) - def fit_mle(self, data, initial_guess=(1, .9), b=1e5): + def fit_mle(self, data): - alpha0 = initial_guess[0] - theta0 = initial_guess[1] + mu = np.mean(data) + var = np.var(data, ddof=1) + alpha0 = mu / var + theta0 = mu / alpha0 def mle(params): return -np.sum(np.log(self.pmf(data, params[0], params[1]))) @@ -454,14 +455,14 @@ def _cdf(self, x, alpha, theta): theta[0]) full_cdf = np.cumsum(pmf_list) - cdf = np.array([full_cdf[tx - 1] if x != 0 else 0 for tx in x]) + cdf = np.array([full_cdf[tx - 1] if tx != 0 else 0 for tx in x]) return cdf def _argcheck(self, alpha, theta): # TODO: Can theta or alpha be 0 in the discrete version? - return (alpha > 0 and theta > 0) + return (theta > 0) dgamma = dgamma_gen(name='dgamma', shapes='alpha, theta') From a762eca8234ce9ab350f959afef65333da90ea08 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 24 Jun 2014 18:57:00 -0700 Subject: [PATCH 305/343] Added and tested deviance comparisons --- macroeco/compare/__init__.py | 2 +- macroeco/compare/_compare.py | 64 ++++++++++++++++++++++++++++++++ macroeco/compare/test_compare.py | 38 +++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/macroeco/compare/__init__.py b/macroeco/compare/__init__.py index 0c48c6d..2b34519 100644 --- a/macroeco/compare/__init__.py +++ b/macroeco/compare/__init__.py @@ -21,5 +21,5 @@ """ from ._compare import (nll, lrt, AIC, AIC_compare, - sum_of_squares, r_squared, + sum_of_squares, full_model_nll, deviance, r_squared, preston_bin, pueyo_bins) diff --git a/macroeco/compare/_compare.py b/macroeco/compare/_compare.py index 4232159..5ae16e7 100644 --- a/macroeco/compare/_compare.py +++ b/macroeco/compare/_compare.py @@ -175,6 +175,70 @@ def AIC_compare(aic_list): return delta, weights +def deviance(red_model_nll, full_model_nll): + """ + Calculates the deviance given the negative log-likelihood for a reduced + model and the negative log-likelihood for the full model. + + Parameters + ---------- + red_model_nll : float + Reduced model negative log-likelihood + full_model_nll : float + Full model negative log-likelihood + + Returns + ------- + : float + Deviance + + Notes + ----- + Deviance is 2 * (red_model_nll - full_model_nll) + + + """ + return 2 * (red_model_nll - full_model_nll) + + +@doc_sub(_data_doc) +def full_model_nll(data, model): + """ + Fits a full model to the data. Every data point has a parameter + + Parameters + ----------- + {0} + model : Scipy distribution object + The model to be fit to the data + + Returns + ------- + : float + Negative log likelihood of full model given data + + Notes + ----- + Full model log likelihoods are used when calculating deviance + + """ + try: + mle_params = [model.fit_mle(np.array([dp])) for dp in data] + except AttributeError: + try: + mle_params = [model.fit(np.array([dp])) for dp in data] + except AttributeError: + raise AttributeError("%s has no attribute fit_mle or fit" % + str(model)) + + try: + ll = [model(*mle_params[i]).logpmf(data[i]) for i in xrange(len(data))] + except: + ll = [model(*mle_params[i]).logpdf(data[i]) for i in xrange(len(data))] + + return -np.sum(ll) + + def sum_of_squares(obs, pred): """ Sum of squares between observed and predicted data diff --git a/macroeco/compare/test_compare.py b/macroeco/compare/test_compare.py index 97b32cc..3d7e023 100644 --- a/macroeco/compare/test_compare.py +++ b/macroeco/compare/test_compare.py @@ -7,6 +7,7 @@ from macroeco.compare import * import numpy as np import scipy.stats as stats +import macroeco.models as mod class TestNLL(TestCase): @@ -75,6 +76,43 @@ def test_aicc(self): assert_almost_equal(expected, aic1, decimal=5) +class TestFullModelNLL(TestCase): + + def test_correct_value_for_continuous_models(self): + + # Test that the full model returns what we expect + data = np.array([3, 4, 5]) + + models = [mod.lognorm] + for model in models: + + params = [model.fit_mle(np.array([td])) for td in data] + values = [model(*params[i]).logpdf(data[i]) for i in + xrange(len(data))] + pred_nll = -np.sum(values) + + test_nll = full_model_nll(data, model) + + assert_equal(pred_nll, test_nll) + + def test_correct_value_for_discrete_models(self): + + # Test that the full model returns what we expect + data = np.array([3, 4, 5]) + + models = [mod.nbinom] + for model in models: + + params = [model.fit_mle(np.array([td])) for td in data] + values = [model(*params[i]).logpmf(data[i]) for i in + xrange(len(data))] + pred_nll = -np.sum(values) + + test_nll = full_model_nll(data, model) + + assert_equal(pred_nll, test_nll) + + class TestAICCompare(TestCase): def test_aic_delta_and_weights(self): From 44b3e5be805b07d42102c3fe44927122d406463e Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 24 Jun 2014 18:57:40 -0700 Subject: [PATCH 306/343] Adjusted dgamma fitting for more stability --- macroeco/models/_distributions.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 17ea9eb..438971c 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -387,7 +387,7 @@ class dgamma_gen(rv_discrete_meco): Methods ------- translate_args(alpha, theta) - not used, returns alpha and thet. + not used, returns alpha and theta. fit_mle(data) ml estimate of shape parameters alpha and theta given data %(before_notes)s @@ -415,10 +415,14 @@ def translate_args(self, alpha, theta): @inherit_docstring_from(rv_discrete_meco) def fit_mle(self, data): - mu = np.mean(data) - var = np.var(data, ddof=1) - alpha0 = mu / var - theta0 = mu / alpha0 + if len(data) > 1: + mu = np.mean(data) + var = np.var(data) + theta0 = var / mu + alpha0 = mu / theta0 + else: + alpha0 = 10 + theta0 = 10 def mle(params): return -np.sum(np.log(self.pmf(data, params[0], params[1]))) @@ -436,8 +440,11 @@ def _pmf(self, x, alpha, theta): b = np.atleast_1d(b) x = np.atleast_1d(x) - eq = lambda val, talpha, ttheta: val**(talpha - 1) * \ - np.exp((-1 / ttheta)*val) + eq = lambda val, talpha, ttheta: np.exp((talpha - 1) * np.log(val) - + (val / ttheta)) + + # eq = lambda val, talpha, ttheta: val**(talpha - 1) * \ + # np.exp((-1 / ttheta)*val) norm = np.sum(eq(np.arange(1, b[0] + 1), alpha[0], theta[0])) @@ -513,6 +520,7 @@ def fit_mle(self, data, k_range=(0.1, 100, 0.1)): """ # todo: check and mention in docstring biases of mle for k_agg + data = np.array(data) mu = np.mean(data) return mu, _solve_k_from_mu(data, k_range, nbinom_nll, mu) From ca65d5ca6d3c23d130f993565332237a37ef4c08 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Thu, 26 Jun 2014 22:15:19 -0700 Subject: [PATCH 307/343] Forgot to commit this unittest --- macroeco/misc/test_format_data.py | 84 +++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 macroeco/misc/test_format_data.py diff --git a/macroeco/misc/test_format_data.py b/macroeco/misc/test_format_data.py new file mode 100644 index 0000000..9800055 --- /dev/null +++ b/macroeco/misc/test_format_data.py @@ -0,0 +1,84 @@ +from __future__ import division + +from numpy.testing import (TestCase, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_, assert_raises) + +import numpy as np +from macroeco.misc import * +import pandas as pd + +#TODO: Test data_read_write + + +class TestFormatData(TestCase): + + def test_simple_stack(self): + + # Test that stack gives the same answer as predicted by hand + test_data = pd.DataFrame({'row': [1, 2, 1, 2], + 'column': [1, 1, 2, 2], 'labelA': [1, 0, 3, 4], + 'labelB': [3, 2, 1, 4]}) + + expected = pd.DataFrame({'row': [1,1,2,2,1,1,2,2], 'column': + [1,1,1,1,2,2,2,2], 'label': np.tile(['labelA', 'labelB'], 4), + 'count': [1,3,0,2,3,1,4,4]}, columns=['row', 'column', 'label', + 'count']) + + stack = format_dense(test_data, ['row', 'column']) + assert_equal(np.all(stack == expected), True) + + def test_label_count_col(self): + # Test whether changing label count col work + test_data = pd.DataFrame({'year': ['02', '03'], 'spp1': [1, 2], + 'spp2': [3, 4]}) + + expected = pd.DataFrame({'year': np.repeat(['02', '03'], 2), 'spp': + np.tile(['spp1', 'spp2'], 2), 'ind': [1,3,2,4]}, columns=['year', + 'spp', 'ind']) + + stack = format_dense(test_data, ['year'], label_col="spp", + count_col="ind") + + print stack + print expected + + assert_equal(np.all(stack == expected), True) + + def test_drop_nan(self): + # Test whether dropping nan function works + + test_data = pd.DataFrame({'year': ['02', '03'], 'spp1': [1, np.nan], + 'spp2': [np.nan, 4]}) + + expected = pd.DataFrame({'year': ['02', '03'], 'label': + ['spp1', 'spp2'], 'count': [1,4]}, columns=['year', + 'label', 'count']) + + stack = format_dense(test_data, ['year'], drop_na=True) + + assert_equal(np.all(stack == expected), True) + + def test_nan_to_zero(self): + # Test whether setting nan to zero function works + + test_data = pd.DataFrame({'year': ['02', '03'], 'spp1': [1, np.nan], + 'spp2': [np.nan, 4]}) + + expected = pd.DataFrame({'year': np.repeat(['02', '03'], 2), 'label': + np.tile(['spp1', 'spp2'], 2), 'count': [1,0,0,4]}, columns=['year', + 'label', 'count']) + + stack = format_dense(test_data, ['year'], nan_to_zero=True) + + assert_equal(np.all(stack == expected), True) + + + + + + + + + + From 36b7142cdb0feaddbec37b9d404922257432e68b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Fri, 27 Jun 2014 19:08:04 -0700 Subject: [PATCH 308/343] Spend of full deviance calc --- macroeco/compare/_compare.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/macroeco/compare/_compare.py b/macroeco/compare/_compare.py index 5ae16e7..1ddc015 100644 --- a/macroeco/compare/_compare.py +++ b/macroeco/compare/_compare.py @@ -222,19 +222,30 @@ def full_model_nll(data, model): Full model log likelihoods are used when calculating deviance """ + data = np.sort(data) + unique_data = np.unique(data) + try: - mle_params = [model.fit_mle(np.array([dp])) for dp in data] + mle_params = [model.fit_mle(np.array([dp])) for dp in unique_data] except AttributeError: try: - mle_params = [model.fit(np.array([dp])) for dp in data] + mle_params = [model.fit(np.array([dp])) for dp in unique_data] except AttributeError: raise AttributeError("%s has no attribute fit_mle or fit" % str(model)) + data_df = pd.DataFrame(unique_data, columns=["unq_data"]) + data_df['mle_params'] = mle_params + data_df.set_index("unq_data", inplace=True) + fitted_data = pd.DataFrame(np.arange(len(data)), index=data).join(data_df) + full_mle = fitted_data.mle_params + try: - ll = [model(*mle_params[i]).logpmf(data[i]) for i in xrange(len(data))] + ll = [model(*full_mle.iloc[i]).logpmf(data[i]) for i in + xrange(len(data))] except: - ll = [model(*mle_params[i]).logpdf(data[i]) for i in xrange(len(data))] + ll = [model(*full_mle.iloc[i]).logpdf(data[i]) for i in + xrange(len(data))] return -np.sum(ll) From e11ec06debd70ed0f85bda68f0ed1b464900086c Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 29 Jun 2014 13:29:07 -0700 Subject: [PATCH 309/343] Allowed discrete gamma fit to take initial values --- macroeco/models/_distributions.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 438971c..66f6e02 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -413,7 +413,12 @@ def translate_args(self, alpha, theta): return alpha, theta @inherit_docstring_from(rv_discrete_meco) - def fit_mle(self, data): + def fit_mle(self, data, init_vals=(80, 80)): + """%(super)s + In addition to data, can take init_vals which allows the user to + specify initial values for (alpha, theta) during the optimization. + + """ if len(data) > 1: mu = np.mean(data) @@ -421,8 +426,8 @@ def fit_mle(self, data): theta0 = var / mu alpha0 = mu / theta0 else: - alpha0 = 10 - theta0 = 10 + alpha0 = init_vals[0] + theta0 = init_vals[1] def mle(params): return -np.sum(np.log(self.pmf(data, params[0], params[1]))) From 27c82558a7ccfadb98f2cae81f8c29080401846e Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Sun, 29 Jun 2014 13:29:32 -0700 Subject: [PATCH 310/343] full_nll fxn can take kwargs --- macroeco/compare/_compare.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/macroeco/compare/_compare.py b/macroeco/compare/_compare.py index 1ddc015..f519a43 100644 --- a/macroeco/compare/_compare.py +++ b/macroeco/compare/_compare.py @@ -202,7 +202,7 @@ def deviance(red_model_nll, full_model_nll): @doc_sub(_data_doc) -def full_model_nll(data, model): +def full_model_nll(data, model, **kwargs): """ Fits a full model to the data. Every data point has a parameter @@ -211,6 +211,8 @@ def full_model_nll(data, model): {0} model : Scipy distribution object The model to be fit to the data + kwargs : keyword args + Additional keyword arguments for model fitting procedure Returns ------- @@ -226,7 +228,7 @@ def full_model_nll(data, model): unique_data = np.unique(data) try: - mle_params = [model.fit_mle(np.array([dp])) for dp in unique_data] + mle_params = [model.fit_mle(np.array([dp]), **kwargs) for dp in unique_data] except AttributeError: try: mle_params = [model.fit(np.array([dp])) for dp in unique_data] From 8779c9fe9533e5f4b5b3f6ea63ae2527c3895b32 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 15 Jul 2014 00:04:27 -0700 Subject: [PATCH 311/343] Added docdict support for updated scipy --- macroeco/models/_distributions.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index e893c69..9298bb9 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -4,9 +4,14 @@ from decimal import Decimal import numpy as np import numpy.random as nprand +from scipy.stats.distributions import (rv_discrete, rv_continuous) + +try: + from scipy.stats.distributions import (docdict, docdict_discrete) +except ImportError: + # Scipy version '0.14.0' support + from scipy.stats._distn_infrastructure import (docdict, docdict_discrete) -from scipy.stats.distributions import (rv_discrete, rv_continuous, docdict, - docdict_discrete) import scipy.stats as stats import scipy.optimize as optim import scipy.special as special From cc9d28d1372f8194e64857a899b5145997776a49 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 12 Jun 2014 19:45:06 -0700 Subject: [PATCH 312/343] Correction to O-ring correction factor --- macroeco/empirical/_empirical.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 8a28283..acfd212 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -785,7 +785,10 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): circ.boundary.length) hist = hist / corr_factor # Edge corrected hist - hist[corr_factor == 0] = 0 # If corr_factor 0, hist should be 0 + + # If none of ring inside plot, ignore by setting hist and area 0 + hist[corr_factor == 0] = 0 + torus_areas[corr_factor == 0] = 0 hists += hist areas += torus_areas From edc66d1afe166fe305e7c2768823825969db76a0 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 12 Jun 2014 19:45:45 -0700 Subject: [PATCH 313/343] Adjust default arguments for fitting k of NBDs --- macroeco/models/_distributions.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 9298bb9..7a7772d 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -413,18 +413,17 @@ def translate_args(self, mu, k_agg): return mu, k_agg @inherit_docstring_from(rv_discrete_meco) - def fit_mle(self, data, k_range=(0.1, 100, 0.1)): + def fit_mle(self, data, k_array=np.arange(0.1, 100, 0.1)): """%(super)s - In addition to data, gives an optional keyword argument k_range - contains a tuple of the start, stop, and step values to search for - k_agg. default is ``k_range=(0.1,100,0.1)``. A brute force search is - then used to find the parameter k_agg. + In addition to data, gives an optional keyword argument k_array + containing the values to search for k_agg. A brute force search is then + used to find the parameter k_agg. """ # todo: check and mention in docstring biases of mle for k_agg mu = np.mean(data) - return mu, _solve_k_from_mu(data, k_range, nbinom_nll, mu) + return mu, _solve_k_from_mu(data, k_array, nbinom_nll, mu) def _get_p_from_mu(self, mu, k_agg): return k_agg / (k_agg + mu) @@ -496,7 +495,7 @@ class cnbinom_gen(rv_discrete_meco): ------- translate_args(mu, k_agg, b) not used, returns mu, k_agg, and b. - fit_mle(data, k_range=(0.1,100,0.1)) + fit_mle(data, k_array=np.arange(0.1,100,0.1)) ml estimate of shape parameters mu and k_agg given data %(before_notes)s mu : float @@ -522,7 +521,7 @@ def translate_args(self, mu, k_agg, b): return mu, k_agg, b @inherit_docstring_from(rv_discrete_meco) - def fit_mle(self, data, b=None, k_range=(0.1, 100, 0.1)): + def fit_mle(self, data, b=None, k_array=np.arange(0.1, 100, 0.1)): data = np.array(data) mu = np.mean(data) @@ -530,7 +529,7 @@ def fit_mle(self, data, b=None, k_range=(0.1, 100, 0.1)): if not b: b = np.sum(data) - return mu, _solve_k_from_mu(data, k_range, _cnbinom_nll, mu, b), b + return mu, _solve_k_from_mu(data, k_array, _cnbinom_nll, mu, b), b def _pmf(self, x, mu, k_agg, b): return np.exp(self._logpmf(x, mu, k_agg, b)) @@ -571,7 +570,7 @@ def _ln_choose(n, k_agg): return gammaln(n + 1) - (gammaln(k_agg + 1) + gammaln(n - k_agg + 1)) -def _solve_k_from_mu(data, k_range, nll, *args): +def _solve_k_from_mu(data, k_array, nll, *args): """ For given args, return k_agg from searching some k_range. @@ -592,7 +591,6 @@ def _solve_k_from_mu(data, k_range, nll, *args): # TODO: See if a root finder like fminbound would work with Decimal used in # logpmf method (will this work with arrays?) - k_array = np.arange(*k_range) nll_array = np.zeros(len(k_array)) for i in range(len(k_array)): From 675f838427e1cf43eff8add67b4ed20d985d5ae3 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 26 Jun 2014 14:32:01 -0700 Subject: [PATCH 314/343] O-ring refactoring --- macroeco/empirical/_empirical.py | 49 ++++++++++----------- macroeco/empirical/test_empirical.py | 65 +++++++++++++++------------- macroeco/empirical/test_table1.csv | 7 +-- 3 files changed, 61 insertions(+), 60 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index acfd212..a2fb520 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -682,26 +682,30 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): Returns ------- {1} Result has two columns, x and y, that give the distance to the center - of a torus and the number or density of individuals (possibly edge - corrected) found in that torus. + of a torus and the number or density of individuals found in that torus. Notes ----- - If density is False, raw counts of individuals within a distance range, - without any edge correction, are returned. + If density is False, counts are raw counts, non-edge corrected, within + rings. Pairwise distances are directional, giving n(n-1) total distances for a species with n individuals, as edge correction is inherently directional. - If there are no records for a species in a split, entire result table will - be a dataframe with no records. If there are records but a species has only - one individual, dataframe will have zero count at all torus areas. + Bins include the lower edge and exclude the upper edge, except for the + final bin which includes both the lower and upper edge. Floating point + arithmetic may cause points located "exactly" on edges to be allocated + contrary to this rule, however. + + If there are no records for a species, result table will be a dataframe + with no records. If there are records but a species has only one + individual, dataframe will have zero count at all torus areas. When using density, the maximum distance used for edge correction, given by - the mean of the last two bin_edge values, should be set to no greater than - one half the diagonal distance across the plot. This ensures that it is not - possible for an entire edge correction buffer to be outside of the plot, - which could lead to divide by zero errors. + the mean of the last two bin_edge values, should ideally be set to no + greater than one half the diagonal distance across the plot. This ensures + that it is not possible for an entire edge correction buffer to be outside + of the plot. {2} @@ -727,7 +731,7 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): # Get table for just this species spp_table = subpatch.table[subpatch.table[spp_col] == spp] - # If spp not present or singleton, continue + # If spp not present, continue if (len(spp_table) == 0): result_list.append((substring, pd.DataFrame(columns=['x','y']))) continue @@ -767,31 +771,26 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): other_dists = np.tile(other_dists, count) # Add 0's for count at this point to account for count here - # Multiplied by two to get directional pairwise dists - n_others_here = count - 1 - if n_others_here > 0: + if count > 1: other_dists = np.concatenate((other_dists, - np.zeros(n_others_here*2))) + np.zeros(count*(count-1)))) # Calculate histogram of distances to other points hist, _ = np.histogram(other_dists, bin_edges) # Convert histogram to density if desired corr_factor = np.ones(len(radii)) # Frac length in plot - for i, r in enumerate(radii): + for j, r in enumerate(radii): circ = geo.Point(*point).buffer(r, resolution=64) outside_len = circ.boundary.difference(plot_poly).length - corr_factor[i] = ((circ.boundary.length - outside_len) / + corr_factor[j] = ((circ.boundary.length - outside_len) / circ.boundary.length) - hist = hist / corr_factor # Edge corrected hist - - # If none of ring inside plot, ignore by setting hist and area 0 - hist[corr_factor == 0] = 0 - torus_areas[corr_factor == 0] = 0 - + # Add hist and corrected area for this point to running totals hists += hist - areas += torus_areas + areas += torus_areas * corr_factor * count + print radii + print i, torus_areas, corr_factor, areas # If density, divide summed torus counts by summed areas if density: diff --git a/macroeco/empirical/test_empirical.py b/macroeco/empirical/test_empirical.py index 610175c..f98868e 100644 --- a/macroeco/empirical/test_empirical.py +++ b/macroeco/empirical/test_empirical.py @@ -67,7 +67,7 @@ def test_simple(self): def test_simple_with_cols(self): # Specify count and spp_col here sad = emp.sad(self.pat1, self.cols1, None) - assert_equal(sad[0][1]['y'], [4,3]) + assert_equal(sad[0][1]['y'], [4,4]) def test_two_way_split(self): # Complete split generates 6 results @@ -84,19 +84,18 @@ def test_two_way_split(self): def test_one_way_uneven_split(self): # 0.2 should fall in second division of y sad = emp.sad(self.pat1, self.cols1, 'y:2') - print sad assert_equal(len(sad), 2) assert_equal(sad[0][1]['spp'].values, ['a']) assert_equal(sad[0][1]['y'].values, [2]) assert_equal(sad[1][1]['spp'].values, ['a','b']) - assert_equal(sad[1][1]['y'].values, [2,3]) + assert_equal(sad[1][1]['y'].values, [2,4]) def test_split_categorical(self): sad = emp.sad(self.pat1, self.cols1, 'year:split; x:2') assert_equal(sad[0][1]['y'].values, 3) assert_equal(sad[1][1]['y'].values, []) assert_equal(sad[2][1]['y'].values, [1,1]) - assert_equal(sad[3][1]['y'].values, [2]) + assert_equal(sad[3][1]['y'].values, [3]) def test_clean(self): # No a in second split on x @@ -113,12 +112,12 @@ def test_no_splits(self): # Just total abundance by species ssad = emp.ssad(self.pat1, self.cols1, None) assert_equal(ssad[0][1]['y'], [4]) - assert_equal(ssad[1][1]['y'], [3]) + assert_equal(ssad[1][1]['y'], [4]) def test_with_split(self): ssad = emp.ssad(self.pat1, self.cols1, 'x:2') assert_equal(ssad[0][1]['y'], [4,0]) # spp a - assert_equal(ssad[1][1]['y'], [1,2]) # spp b + assert_equal(ssad[1][1]['y'], [1,3]) # spp b class TestSAR(Patches): @@ -198,50 +197,58 @@ def test_x_y_division_uneven_y_jaccard(self): assert_equal(comm[0][1]['y'], [1/2., 0, 0, 0, 1/2., 0]) class TestORing(Patches): - # TODO: Individuals falling directly on a radius may be allocated - # ambiguously between adjacent toruses - # TODO: Main may fail with error if dataframe has no records when trying to # fit or make plot. - def test_missing_spp_returns_df_with_no_records(self): - o_ring = emp.o_ring(self.pat1, self.cols1, '', 'nothere', [0,.11,.2]) + def test_spp_no_present_returns_empty_df(self): + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'nothere', [0,.1,.2]) assert_frame_equal(o_ring[0][1], pd.DataFrame(columns=['x','y'])) def test_one_individual_returns_zeros(self): self.pat1.table = self.pat1.table[2:4] # Leave 1 'a' and 1 'b' - o_ring = emp.o_ring(self.pat1, self.cols1, '', 'a', [0,.11,.2]) + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'a', [0,.1,.2]) assert_equal(o_ring[0][1]['y'], [0, 0]) - def test_simple_count_no_density_a(self): - o_ring = emp.o_ring(self.pat1, self.cols1, '', 'a', [0,.11,.2], + def test_no_density_a(self): + # Points on bin edge may be allocated ambiguously due to floating point + # issues - testing here with slightly offset edges + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'a', [0,.101,.201,.301], density=False) - assert_almost_equal(o_ring[0][1]['x'], [0.055, 0.155]) - assert_almost_equal(o_ring[0][1]['y'], [8, 4]) + assert_almost_equal(o_ring[0][1]['x'], [0.0505, 0.151, 0.251]) + assert_almost_equal(o_ring[0][1]['y'], [8, 4, 0]) - def test_simple_count_no_density_b(self): - o_ring = emp.o_ring(self.pat1, self.cols1, '', 'b', [0,.11,.2], + def test_no_density_b(self): + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'b', [0,.1,.2,.3], density=False) - assert_almost_equal(o_ring[0][1]['x'], [0.055, 0.155]) - assert_almost_equal(o_ring[0][1]['y'], [2, 4]) + assert_almost_equal(o_ring[0][1]['x'], [0.05, 0.15,0.25]) + assert_almost_equal(o_ring[0][1]['y'], [6, 6, 0]) - def test_simple_count_with_split_a(self): - o_ring = emp.o_ring(self.pat1, self.cols1, 'y:2', 'a', [0,.11,.2], + def test_with_split_a(self): + o_ring = emp.o_ring(self.pat1, self.cols1, 'y:2', 'a', [0,.1,.2], density=False) assert_equal(o_ring[0][1]['y'], [2, 0]) # Bottom assert_equal(o_ring[1][1]['y'], [2, 0]) # Top - def test_simple_count_with_split_b(self): - o_ring = emp.o_ring(self.pat1, self.cols1, 'y:2', 'b', [0,.11,.2], + def test_with_split_b(self): + o_ring = emp.o_ring(self.pat1, self.cols1, 'y:2', 'b', [0,.1,.2], density=False) assert_equal(o_ring[0][1]['y'], []) # Bottom - assert_equal(o_ring[1][1]['y'], [2, 4]) # Top + assert_equal(o_ring[1][1]['y'], [6, 6]) # Top def test_density_a(self): - o_ring = emp.o_ring(self.pat1, self.cols1, '', 'b', [0,.05,.1]) - assert_array_almost_equal(o_ring[0][1]['y'], [1358.12218105,0]) - - # TODO: More checks of density (which inclues edge correction) + # First radius is 0.05 + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'a', [0,.10000001]) + assert_array_almost_equal(o_ring[0][1]['y'], + [8 / (1.25*np.pi*(0.1)**2)], + 3) + + def test_density_b(self): + # First radius is 0.05 + o_ring = emp.o_ring(self.pat1, self.cols1, '', 'b', [0,.10000001,.1828427]) + assert_array_almost_equal(o_ring[0][1]['y'], + [6 / (1.25*np.pi*(0.1)**2), + 6 / (3/8 * np.pi*(0.1828427**2 - 0.1**2))], + 3) class TestProduct(): diff --git a/macroeco/empirical/test_table1.csv b/macroeco/empirical/test_table1.csv index 60a9107..a019594 100644 --- a/macroeco/empirical/test_table1.csv +++ b/macroeco/empirical/test_table1.csv @@ -1,6 +1 @@ -spp,x,y,count,year -a,.1,.1,2,2000 -a,.1,.2,1,2000 -a,.1,.3,1,2010 -b,.1,.2,1,2010 -b,.2,.3,2,2010 \ No newline at end of file +spp,x,y,count,year a,0.1,0.1,2,2000 a,0.1,0.2,1,2000 a,0.1,0.3,1,2010 b,0.1,0.2,1,2010 b,0.2,0.3,3,2010 \ No newline at end of file From 461b648f9162df5246e8a834dca8676c8ff4169b Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 29 Aug 2014 14:15:36 -0700 Subject: [PATCH 315/343] Remove accidental print statements --- macroeco/empirical/_empirical.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index a2fb520..9774ad3 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -789,8 +789,6 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): # Add hist and corrected area for this point to running totals hists += hist areas += torus_areas * corr_factor * count - print radii - print i, torus_areas, corr_factor, areas # If density, divide summed torus counts by summed areas if density: From 45b3094c6efa8fd00326d7320de94831b71ae8dd Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 29 Aug 2014 14:17:20 -0700 Subject: [PATCH 316/343] Add logser from scipy --- macroeco/models/__init__.py | 3 +- macroeco/models/_distributions.py | 55 ++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index d99c08d..ee9fd8e 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -29,6 +29,7 @@ geom_uptrunc nbinom cnbinom + logser logser_uptrunc plnorm plnorm_ztrunc @@ -59,7 +60,7 @@ """ from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, - logser_uptrunc, plnorm, plnorm_ztrunc, + logser, logser_uptrunc, plnorm, plnorm_ztrunc, expon, expon_uptrunc, lognorm) from ._curves import (power_law, diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 7a7772d..ef733c1 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -600,6 +600,60 @@ def _solve_k_from_mu(data, k_array, nll, *args): return k_array[min_nll_idx] +class logser_gen(rv_discrete_meco): + """ + A Logarithmic (Log-Series, Series) discrete random variable. + + Notes + ----- + The probability mass function for `logser` is:: + + logser.pmf(k) = - p**k / (k*log(1-p)) + + for ``k >= 1``. + + `logser` takes ``p`` as shape parameter. + + """ + + @inherit_docstring_from(rv_continuous_meco) + def translate_args(self, mu): + eq = lambda p, mu: -p/np.log(1-p)/(1-p) - mu + return optim.brentq(eq, 1e-16, 1-1e-16, args=(mu), disp=True) + + @inherit_docstring_from(rv_continuous_meco) + def fit_mle(self, data): + # Use method of moments + return self.translate_args(np.mean(data)), + + def _rvs(self, p): + # looks wrong for p>0.5, too few k=1 + # trying to use generic is worse, no k=1 at all + return mtrand.logseries(p, size=self._size) + + def _argcheck(self, p): + return (p > 0) & (p < 1) + + def _pmf(self, x, p): + return -np.power(p, x) * 1.0 / x / np.log(1 - p) + + def _stats(self, p): + r = log(1 - p) + mu = p / (p - 1.0) / r + mu2p = -p / r / (p - 1.0)**2 + var = mu2p - mu*mu + mu3p = -p / r * (1.0+p) / (1.0 - p)**3 + mu3 = mu3p - 3*mu*mu2p + 2*mu**3 + g1 = mu3 / np.power(var, 1.5) + + mu4p = -p / r * ( + 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4) + mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4 + g2 = mu4 / var**2 - 3.0 + return mu, var, g1, g2 + +logser = logser_gen(name="logser", shapes="p") + class logser_uptrunc_gen(rv_discrete_meco): r""" @@ -638,7 +692,6 @@ class logser_uptrunc_gen(rv_discrete_meco): Abundance, Distribution, and Energetics. Oxford, United Kingdom: Oxford University Press. - """ @inherit_docstring_from(rv_discrete_meco) From ac405de7b06bdc7cc7614e1c7acd8fded253b476 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Fri, 29 Aug 2014 14:19:46 -0700 Subject: [PATCH 317/343] Update gitignore to account for MacroecoDesktop build files --- .gitignore | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 68f40e4..bdd4820 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ *generated* -*/build/* */_build/* -*/dist/* +build/* +dist/* +demo/* +_private/* *.swp *.pyc *.DS_Store From 21a3ca3a0bf72b92ac5f3c38c9ec602b948642d6 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 3 Sep 2014 08:44:09 -0700 Subject: [PATCH 318/343] Lognorm logser_uptrunc rvs fixed. Issue #85 --- macroeco/models/_distributions.py | 5 ++++- macroeco/models/test_distributions.py | 19 ++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 4a3662a..8f08cf4 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -852,6 +852,9 @@ def _cdf(self, x, p, b): def _rvs(self, p, b): # Code from weecology/macroecotools + if not self._size: + self._size = 1 + out = [] if p < 1: for i in range(self._size): @@ -1361,7 +1364,7 @@ def _argcheck(self, mu, sigma): return True def _rvs(self, mu, sigma): - return stats.lognorm.rvs(sigma, scale=np.exp(mu)) + return stats.lognorm.rvs(sigma, scale=np.exp(mu), size=self._size) def _pdf(self, x, mu, sigma): return stats.lognorm.pdf(x, sigma, scale=np.exp(mu)) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 67e6003..d2c0f47 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -310,7 +310,6 @@ def test_translate_args(self): lg = logser_uptrunc.translate_args(20 / 20, 20)[0] assert_equal(0, 0) - def test_n_close_to_s(self): # Test the solver doesn't fail when N is very close to S @@ -327,6 +326,15 @@ def test_rank(self): assert_array_equal(exp_vals, test_vals) + def test_rvs(self): + + # Make sure random number generator is returning what is expected + res1 = logser_uptrunc.rvs(.9, 100) + assert_equal(1, len(np.atleast_1d(res1))) + + res2 = lognorm.rvs(.9, 100, size=5) # Should be length 5 + assert_equal(5, len(res2)) + class TestLognorm(TestCase): @@ -397,6 +405,15 @@ def test_fit_mle(self): test1 = lognorm.fit_mle(data1)[1] assert_almost_equal(scipy_ans, test1) + def test_rvs(self): + + # Test that multiple random numbers can be returned without error + res1 = lognorm.rvs(5, 5) # Should be length 1 + assert_equal(1, len(np.atleast_1d(res1))) + + res2 = lognorm.rvs(5, 5, size=5) # Should be length 5 + assert_equal(5, len(res2)) + class TestPlnorm(TestCase): From 9da371f76a09ffe264d6a565ecf55ca658fc4406 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 3 Sep 2014 11:53:58 -0700 Subject: [PATCH 319/343] Added alternative way to compute random variables --- macroeco/models/_distributions.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 8f08cf4..2d505e0 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -206,6 +206,22 @@ def rank(self, n, *args): """{0}""" return self.ppf((np.arange(1, n+1) - 0.5) / n, *args) + @doc_sub(_doc_rvs_alt) + def rvs_alt(self, *args, **kwargs): + """{0}""" + l = kwargs.get('l', 1) + b = kwargs.get('b', 1e5) + size = kwargs.get('size', 1) + + model_cdf = self.cdf(np.arange(l, b + 1), *args) + + unif_rands = np.random.random(size) + model_rands = np.array([np.where(tx <= model_cdf)[0][0] + l + for tx in unif_rands]) + + return model_rands + + # # Discrete # From 39edd5dede73a4b59b56fa3a9ed68e12b154d34c Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 3 Sep 2014 11:56:40 -0700 Subject: [PATCH 320/343] Test and docstring for alt_rvs --- macroeco/models/_distributions.py | 28 +++++++++++++++++++++++++++ macroeco/models/test_distributions.py | 11 ++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 2d505e0..bfee1e5 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -72,6 +72,34 @@ ----- """ +_doc_rvs_alt = \ +""" +Alternative random number generator for discrete distributions. Uses the +model's cdf function and a uniform random number generator. Can be faster than +native scipy rvs for some custom models. Will perform well if the the models +cdf function is also fast. + +Parameters +---------- +%(shapes)s : array_like + shape parameters +l : int + Lower bound of distribution (Either 0 or 1). Default is 1 +b : int + Upper bound of distribution for computational purposes, even if + distribution technically has infinite support. Default is 1e5. +size : int + Number of random variables to draw. Default is 1. + +Returns +------- +array + Random variables from model + +Notes +----- +""" + _doc_fit_mle = \ """ Return MLEs for shape parameters from data diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index d2c0f47..6a86866 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -140,9 +140,18 @@ def test_fit_mle_with_R(self): def test_fit_mle_with_manual_calc(self): x = np.array([6,17,14,12,8,10,4,9,3,12,4,2,12,8,14,16,9,10,8,5,6]) - mu, k = nbinom.fit_mle(x, k_range=(0.01,10,0.01)) + mu, k = nbinom.fit_mle(x, k_array=np.arange(0.01,10,0.01)) assert_array_almost_equal([mu, k], [9, 8.54], decimal=2) + def test_alternative_rvs(self): + rand_alt = nbinom.rvs_alt(5, 1, l=0, size=10000) + rand = nbinom.rvs(5, 1, size=10000) + + alt_k = nbinom.fit_mle(rand_alt, k_array=np.arange(0.5, 1.5, 0.01)) + k = nbinom.fit_mle(rand, k_array=np.arange(0.5, 1.5, 0.01)) + + assert_almost_equal(alt_k, k, decimal=1) + class TestCnbinom(TestCase): def test_pmf(self): From e9011fba58ed54ac700ed91b7197715346664f53 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 3 Sep 2014 11:57:41 -0700 Subject: [PATCH 321/343] Tested logseries and changed to scipy implementation. Issue #83 --- macroeco/models/_distributions.py | 9 ++++- macroeco/models/test_distributions.py | 54 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index bfee1e5..804ff01 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -781,13 +781,18 @@ def fit_mle(self, data): def _rvs(self, p): # looks wrong for p>0.5, too few k=1 # trying to use generic is worse, no k=1 at all - return mtrand.logseries(p, size=self._size) + return stats.logser.rvs(p, size=self._size) + #return np.random.mtrand.logseries(p, size=self._size) def _argcheck(self, p): return (p > 0) & (p < 1) def _pmf(self, x, p): - return -np.power(p, x) * 1.0 / x / np.log(1 - p) + return stats.logser.pmf(x, p) + # return -np.power(p, x) * 1.0 / x / np.log(1 - p) + + def _cdf(self, x, p): + return stats.logser.cdf(x, p) def _stats(self, p): r = log(1 - p) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index 6a86866..bd7a946 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -253,6 +253,60 @@ def test_rank(self): assert_array_equal(logseries_rank, dgamma_rank) +class TestLogser(TestCase): + + def test_pmf(self): + + # Testing against values in Williams 1944, + # Some applications of the logarithmic series and the index of + # diversity to ecological problems, pg. 18. + + # Acridiidae: S = 826, p = 0.92964 (There seems to be an error in + # their data at 3 -> should be 83.3 not 88.3) + test_vals = np.array([289.3, 134.5, 83.3, 58.1, 43.2, 33.5, 26.7, 21.7, + 17.9, 15., 12.7, 10.8, 9.3, 8., 6.9, 6.1, 5.3, 4.6, 4.1, 3.6]) + + pred_pmf = logser.pmf(np.arange(1, 21), 0.92964) + pred_vals = np.round(pred_pmf * 826, decimals=1) + assert_array_equal(test_vals, pred_vals) + + # Mantidae: S = 209, p = 0.89781 + test_vals = np.array([82.3, 36.9, 22.1, 14.9, 10.7, 8., 6.2, 4.8, 3.9, + 3.1, 2.5, 2.1, 1.7, 1.4, 1.2, 1., 0.9, 0.7, 0.6, 0.5]) + + pred_pmf = logser.pmf(np.arange(1, 21), 0.89781) + pred_vals = np.round(pred_pmf * 209, decimals=1) + assert_array_equal(test_vals, pred_vals) + + # Blattidae: S = 197, p = 0.96476 + test_vals = np.array([56.8, 27.4, 17.6, 12.8, 9.8, 7.9, 6.5, 5.5, 4.7, + 4.1, 3.6, 3.2, 2.8, 2.5, 2.3, 2.1, 1.9, 1.7, + 1.6, 1.4, 1.3, 1.2, 1.1, 1., 1., 0.9, 0.8, + 0.8, 0.7, 0.7]) + + pred_pmf = logser.pmf(np.arange(1, 31), 0.96476) + pred_vals = np.round(pred_pmf * 197, decimals=1) + assert_array_equal(test_vals, pred_vals) + + def test_translate_args(self): + + # Using values from Williams 1994 + test_vals = [0.92964, 0.89781, 0.96476, 0.97003] + data = [4112 / 826., 805. / 209, 1612. / 197, 480. / 52] + + pred_vals = [logser.translate_args(td) for td in data] + + assert_array_almost_equal(test_vals, pred_vals, decimal=5) + + def test_fit_mle(self): + + test_val = .97003 # Value from Williams 1944 + x = np.arange(1, 53.) + norm_x = x / sum(x) + data = norm_x * (480) + pred_val = logser.fit_mle(data) + assert_almost_equal(test_val, pred_val, decimal=5) + class TestLogserUptrunc(TestCase): From c10c1a447d263823090491daedb13ad3cb09c252 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 10 Sep 2014 17:08:42 -0700 Subject: [PATCH 322/343] Added truncated nbd. Still need to test --- macroeco/models/__init__.py | 7 ++-- macroeco/models/_distributions.py | 65 ++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index cd71aec..34c21f7 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -59,9 +59,10 @@ """ -from _distributions import (geom, geom_uptrunc, nbinom, cnbinom, - logser, logser_uptrunc, plnorm, plnorm_ztrunc, - expon, expon_uptrunc, lognorm, dgamma) +from _distributions import (geom, geom_uptrunc, nbinom, nbinom_ztrunc, + cnbinom, logser, logser_uptrunc, plnorm, + plnorm_ztrunc, expon, expon_uptrunc, lognorm, + dgamma) from ._curves import (power_law, mete_sar, mete_iterative_sar, diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 804ff01..73bb27f 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -628,6 +628,69 @@ def nbinom_nll(data, k_agg, mu): return -np.sum(nbinom._logpmf(data, mu, k_agg)) +class nbinom_ztrunc_gen(rv_discrete_meco): + r""" + The zero-truncated negative binomial random variable + + This distribution is described by Sampford (1955) [#]_ + + .. math:: + + p(x) = \frac{\binom{x + k - 1}{x} \binom{b - x + k/a - k -1}{b + -x}}{\binom{b + k/a - 1}{b}} + + + + + """ + + @inherit_docstring_from(rv_discrete_meco) + def translate_args(self, mu, k_agg): + return mu, k_agg + + @inherit_docstring_from(rv_discrete_meco) + def fit_mle(self, data): + raise NotImplementedError("Method not yet implemented") + + def _pmf(self, x, mu, k_agg): + + norm = np.exp(special.gammaln(k_agg + x) - ((special.gammaln(k_agg) + + special.gammaln(x + 1)))) + p = nbinom_ztrunc_p(mu, k_agg) + kernel = (p / (1 + p))**x * (1 / ((1 + p)**k_agg - 1)) + + return norm * kernel + + def _stats(self, mu, k_agg): + p = nbinom_ztrunc_p(mu, k_agg) + omega = 1 / (1 + p) + eta = 1 - omega + mu = mu + var = (k_agg * eta * (1 + k_agg * eta)) / \ + (omega**2 * (1 - omega**k_agg)) - mu**2 + return mu, var, None, None + +nbinom_ztrunc = nbinom_ztrunc_gen(name='nbinom_ztrunc', shapes='mu, k_agg') + + +def _nbinom_ztrunc_p(mu, k_agg): + """ Calculates p parameter for truncated negative binomial + + Function given in Sampford 1955, equation 4 + + Note that omega = 1 / 1 + p in Samford + """ + + p_eq = lambda p, mu, k_agg: (k_agg * p) / (1 - (1 + p)**-k_agg) - mu + + # The upper bound needs to be large. p will increase with increasing mu + # and decreasing k_agg + p = optim.brentq(p_eq, 1e-10, 1e10, args=(mu, k_agg)) + return p + +nbinom_ztrunc_p = np.vectorize(_nbinom_ztrunc_p) + + class cnbinom_gen(rv_discrete_meco): r""" The conditional negative binomial random variable. @@ -880,7 +943,7 @@ def _pmf(self, x, p, b): pmf = stats.logser.pmf(x, p) / stats.logser.cdf(b, p) else: ivals = np.arange(1, b[0] + 1) - normalization = sum(p[0] ** ivals / ivals) + normalization = np.sum(p[0] ** ivals / ivals) pmf = (p[0] ** x / x) / normalization return pmf From 00bd0e3c130e8783173a109a5d8abb746b3e86f6 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 15 Sep 2014 11:18:10 -0700 Subject: [PATCH 323/343] Added tnbd fit_mle method --- macroeco/models/_distributions.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 73bb27f..5b0f44e 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -634,14 +634,14 @@ class nbinom_ztrunc_gen(rv_discrete_meco): This distribution is described by Sampford (1955) [#]_ + Wrong math + .. math:: p(x) = \frac{\binom{x + k - 1}{x} \binom{b - x + k/a - k -1}{b -x}}{\binom{b + k/a - 1}{b}} - - """ @inherit_docstring_from(rv_discrete_meco) @@ -649,17 +649,32 @@ def translate_args(self, mu, k_agg): return mu, k_agg @inherit_docstring_from(rv_discrete_meco) - def fit_mle(self, data): - raise NotImplementedError("Method not yet implemented") + def fit_mle(self, data, k_agg0=0.5): + + mu = np.mean(data) + + def mle(k): + + p = nbinom_ztrunc_p(mu, k) + return -np.sum(np.log(self.pmf(data, p, k))) + + k = optim.fmin(mle, x0=k_agg0, disp=0) + + return mu, k[0] def _pmf(self, x, mu, k_agg): + x = np.atleast_1d(x) + norm = np.exp(special.gammaln(k_agg + x) - ((special.gammaln(k_agg) + special.gammaln(x + 1)))) p = nbinom_ztrunc_p(mu, k_agg) kernel = (p / (1 + p))**x * (1 / ((1 + p)**k_agg - 1)) + pmf = norm * kernel + + pmf[x == 0] = 0 - return norm * kernel + return pmf def _stats(self, mu, k_agg): p = nbinom_ztrunc_p(mu, k_agg) @@ -678,7 +693,7 @@ def _nbinom_ztrunc_p(mu, k_agg): Function given in Sampford 1955, equation 4 - Note that omega = 1 / 1 + p in Samford + Note that omega = 1 / 1 + p in Sampford """ p_eq = lambda p, mu, k_agg: (k_agg * p) / (1 - (1 + p)**-k_agg) - mu From 7ea6a950a08a170df3fb54b0039361fc460fe758 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 15 Sep 2014 21:19:11 -0700 Subject: [PATCH 324/343] Added dgamma and nbinom_ztrunc to docs --- macroeco/models/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 34c21f7..d019cd0 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -28,11 +28,13 @@ geom geom_uptrunc nbinom + nbinom_ztrunc cnbinom logser logser_uptrunc plnorm plnorm_ztrunc + dgamma The following continuous distributions are available. From 703bf67a5de9d1f7ce632ad92a8a40e864ddd1a8 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 15 Sep 2014 21:20:16 -0700 Subject: [PATCH 325/343] Added docstrings to nbinom_ztrunc. Finished fit_mle --- macroeco/models/_distributions.py | 64 ++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 5b0f44e..373ced4 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -424,13 +424,13 @@ def p_eq(x, mu, b): class dgamma_gen(rv_discrete_meco): r""" - A discrete gamma random variable + A discrete gamma random variable. .. math:: - P(x) = k * x^(\alpha - 1) * e^{(-1 / \theta)*x} + P(x) = k * x^{(\alpha - 1)} * e^{(-1 / \theta)*x} - for ``x >= 1``, ``\alpha > 0`` and ``\theta > 0``. + for ``x >= 1``, ``\theta > 0``. ``k`` is the normalizing constant. Methods @@ -541,7 +541,7 @@ class nbinom_gen(rv_discrete_meco): p(x) = \frac{\gamma (k + x)}{\gamma(k) x!} \left(\frac{k}{k+\mu}\right)^k \left(\frac{\mu}{k+\mu}\right)^x - for ``x >= 0``. in the traditional parameterization, ``n = k_agg`` (the + for ``x >= 0``. In the traditional parameterization, ``n = k_agg`` (the size parameter) and ``p = k_agg / (k_agg + mu)``. the ``loc`` parameter is not used. @@ -630,33 +630,69 @@ def nbinom_nll(data, k_agg, mu): class nbinom_ztrunc_gen(rv_discrete_meco): r""" - The zero-truncated negative binomial random variable + The zero-truncated negative binomial random variable. - This distribution is described by Sampford (1955) [#]_ - - Wrong math + This distribution is described by Sampford (1955) [#]_. .. math:: - p(x) = \frac{\binom{x + k - 1}{x} \binom{b - x + k/a - k -1}{b - -x}}{\binom{b + k/a - 1}{b}} + p(x) = \frac{(k + x - 1)!}{(k - 1)!x!} \left(\frac{p} + {1 + p}\right)^{x} \frac{1}{(1 + p)^{k - 1}} + + for ``x >= 1``. ``p`` can be computed directly from the mean of the + distribution and is calculated internally so that the distribution is + parameterized by ``\mu`` and ``k_agg`` analogous to ``nbinom``. + + Methods + ------- + translate_args(mu, k_agg, return_p=False) + Returns mu and k_agg. Returns p parameter if return_p is True. + fit_mle(data, k_agg0=0.5) + ml estimate of shape parameters mu and k_agg given data + %(before_notes)s + mu : float + distribution mean + k_agg : float + clustering parameter + + Notes + ----- + + References + ---------- + .. [#] + Sampford, M. R. (1955). The truncated negative binomial distribution. + Biometrika, 42(1), 58-69 """ @inherit_docstring_from(rv_discrete_meco) - def translate_args(self, mu, k_agg): - return mu, k_agg + def translate_args(self, mu, k_agg, return_p=False): + """%(super)s + + The keyword argument return_p computes the p values used to define the + the truncated negative binomial + """ + if return_p: + return nbinom_ztrunc_p(mu, k_agg), k_agg + else: + return mu, k_agg @inherit_docstring_from(rv_discrete_meco) def fit_mle(self, data, k_agg0=0.5): + """%(super)s + + In addition to data, gives an optional keyword argument k_agg0 that + specifies the initial value of k_agg used in the optimization. + + """ mu = np.mean(data) def mle(k): - p = nbinom_ztrunc_p(mu, k) - return -np.sum(np.log(self.pmf(data, p, k))) + return -np.sum(np.log(self.pmf(data, mu, k))) k = optim.fmin(mle, x0=k_agg0, disp=0) From 41019c7c3ecb83d2217e3f1d1d6183d4baeefe56 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 15 Sep 2014 21:20:34 -0700 Subject: [PATCH 326/343] Unittested nbinom_ztrunc --- macroeco/models/test_distributions.py | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/macroeco/models/test_distributions.py b/macroeco/models/test_distributions.py index bd7a946..f518bdd 100644 --- a/macroeco/models/test_distributions.py +++ b/macroeco/models/test_distributions.py @@ -152,6 +152,53 @@ def test_alternative_rvs(self): assert_almost_equal(alt_k, k, decimal=1) + +class TestNbinom_ztrunc(TestCase): + + def test_pmf(self): + # Test pmf gives back expected mean + tpmf = nbinom_ztrunc.pmf(np.arange(1, 500), 4, 1) + tmean = np.sum(np.arange(1, 500) * tpmf) + assert_almost_equal(tmean, 4) + + # Test pmf of 0 is 0 + tpmf = nbinom_ztrunc.pmf(0, 1, 1) + assert_equal(tpmf, 0) + + def test_cdf(self): + + # Test cdf and pmf agree! + tpmf = np.sum(nbinom_ztrunc.pmf(np.arange(1, 20), 20, 10)) + tcdf = nbinom_ztrunc.cdf(19, 20, 10) + assert_equal(tpmf, tcdf) + + def test_get_p_from_mu(self): + + # Test the fit p values are equal to those given in He and Legendre + # 2002 + test_values = [205.9878, 410.9853, 794.7613, 1210.0497, + 1945.9970, 3193.8362] + test_ks = [2, 1, 0.5, 0.3, 0.1363, 0.01] + + ps = np.array([nbinom_ztrunc.translate_args(335356 / 814., tk, + return_p=True)[0] for tk in test_ks]) + + assert_array_almost_equal(ps, test_values, decimal=0) + + def test_fit_mle(self): + + # Test fit returns something close the input + rvs_data = nbinom_ztrunc(10, 1).rvs(size=1000) + ml_mean, ml_k = nbinom_ztrunc.fit_mle(rvs_data) + assert_almost_equal(ml_mean, np.mean(rvs_data)) + assert_almost_equal(ml_k, 1, decimal=0) + + rvs_data = nbinom_ztrunc(20, 10).rvs(size=1000) + ml_mean, ml_k = nbinom_ztrunc.fit_mle(rvs_data) + assert_almost_equal(ml_mean, np.mean(rvs_data)) + assert_almost_equal(ml_k, 10, decimal=0) + + class TestCnbinom(TestCase): def test_pmf(self): From c3bd4a402019c2a1bb8596f58f92d27ec503a582 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Mon, 15 Sep 2014 21:22:47 -0700 Subject: [PATCH 327/343] Added Sampford reference --- macroeco/models/_distributions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index 373ced4..f6c8e2b 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -717,6 +717,8 @@ def _stats(self, mu, k_agg): omega = 1 / (1 + p) eta = 1 - omega mu = mu + + # From Sampford 1955 var = (k_agg * eta * (1 + k_agg * eta)) / \ (omega**2 * (1 - omega**k_agg)) - mu**2 return mu, var, None, None From 6c2beef92c2bd89f6d8b985b9496d11a91645aa5 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 16 Sep 2014 09:45:56 -0700 Subject: [PATCH 328/343] Dropped energy functions from empirical --- macroeco/empirical/_empirical.py | 227 ------------------------------- 1 file changed, 227 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 9774ad3..8dcca39 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -928,233 +928,6 @@ def comm_sep(self, plot_locs, criteria, loc_unit=None): return result - - - -def ied(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the individual energy distribution for the entire community - given the criteria - - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy'. See - sad method for further requirements. - normalize : bool - If True, this distribution is normalized by dividing by the lowest - energy value within each element of criteria. If False, returns raw - energy values. - exponent : float - The exponent of the allometric scaling relationship if energy is - calculated from mass. - - Returns - ------- - result : list - List of tuples containing results, where first element is - dictionary of criteria for this calculation and second element is a - 1D ndarray containing the energy measurement of each individual in - the subset. The third element is the full (not unique) species - list for the given criteria. - - Notes - ----- - If count_col is None or is all ones, the entire energy column for each - subtable is returned. Else, the average energy per individual, - repeated for each individual is returned. This is equivalent to the psi - distribution from Harte (2011). - - - ''' - - spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ - self.parse_criteria(criteria) - - if engy_col == None and mass_col == None: - raise ValueError("No energy or mass column given") - elif engy_col == None and mass_col != None: - mass = True - this_engy = mass_col - else: - mass = False - this_engy = engy_col - - result = [] - for comb in combinations: - - subtable = self.data_table.get_subtable(comb) - - # If all counts are not 1 - if count_col and (not np.all(subtable[count_col] == 1)): - - # Remove any zero counts - subtable = subtable[subtable[count_col] != 0] - # Convert counts to ints - temp_counts = subtable[count_col].astype(int) - - energy = np.repeat((subtable[this_engy] / - subtable[count_col]), temp_counts) - species = np.repeat(subtable[spp_col], temp_counts) - else: - energy = subtable[this_engy] - species = subtable[spp_col] - - # Convert mass to energy if mass is True - if mass: - energy = (energy ** exponent) - - # Normalizing energy - if normalize: - energy = energy / np.min(energy) - result.append((comb, energy, species)) - - return result - -def sed(self, criteria, normalize=True, exponent=0.75, clean=False): - ''' - Calculates the species-level energy distribution for each given species - in the community. - - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. - normalize : bool - If True, this distribution is normalized by dividing by the lowest - energy value within each element of criteria. If False, returns raw - energy values. - exponent : float - The exponent of the allometric scaling relationship if energy is - calculated from mass - clean : bool - If False, sed dictionary contains all species. If True, species - with no individuals are removed. This is useful when subsetting. - - Returns - ------- - result : list of tuples - Each tuple contains two objects. The first object is a dict with - the division specifications that generated the given species energy - distributions. The second object is a dict with a keyword - corresponding to each species in the spp_list. Each species - keyword looks up a np.array that contains the given species - energy distribution. - - Notes - ----- - The theta distribution from Harte (2011) is a an sed. - - ''' - spp_list, spp_col, count_col, engy_col, mass_col, combinations = \ - self.parse_criteria(criteria) - - ied = self.ied(criteria, normalize=normalize, exponent=exponent) - - result = [] - for this_ied in ied: - this_criteria_sed = {} - - for spp in spp_list: - spp_ind = (spp == this_ied[2]) - this_spp_sed = this_ied[1][spp_ind] - - if clean: # If True, don't add empty species lists - if len(this_spp_sed) > 0: - this_criteria_sed[spp] = this_spp_sed - else: - this_criteria_sed[spp] = this_spp_sed - - result.append((this_ied[0], this_criteria_sed)) - - return result - -def ased(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the average species energy distribution for each given - species in a subset. - - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. - - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the average energy for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. - - Notes - ----- - This is equivalent to the nu distribution from Harte 2011 - - ''' - - sed = self.sed(criteria, normalize=normalize, exponent=exponent) - - result = [] - for this_sed in sed: - spp_list = list(this_sed[1].viewkeys()) - spp_list.sort() - - # Take the mean energy for each species - nu = [np.mean(this_sed[1][spp]) for spp in spp_list if - len(this_sed[1][spp]) != 0] - # Truncated spp_list if necessary - spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] - - result.append((this_sed[0], np.array(nu), np.array(spp_list))) - - return result - -def tsed(self, criteria, normalize=True, exponent=0.75): - ''' - Calculates the total species energy distribution for each given - species in a subset. - - Parameters - ---------- - criteria : dict - Dictionary must have contain a key with the value 'energy' or - 'mass'. See sad method for further requirements. - - Returns - ------- - result : list - List of tuples containing results, where the first element is a - dictionary of criteria for this calculation and second element is a - 1D ndarray of length species containing the average energy for each - species. The third element is 1D array listing identifiers for - species in the same order as they appear in the second element of - result. - - ''' - - sed = self.sed(criteria, normalize=normalize, exponent=exponent) - - result = [] - for this_sed in sed: - spp_list = list(this_sed[1].viewkeys()) - spp_list.sort() - - # Take the mean energy for each species - omega = [np.sum(this_sed[1][spp]) for spp in spp_list if - len(this_sed[1][spp]) != 0] - # Truncated spp_list if necessary - spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0] - - result.append((this_sed[0], np.array(omega), np.array(spp_list))) - - return result - - def _get_cols(special_col_names, cols, patch): """ Retrieve values of special_cols from cols string or patch metadata From dbb3fb3165916861820ddb7cc95558265bdde2f9 Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 16 Sep 2014 09:47:17 -0700 Subject: [PATCH 329/343] Fixed SAR bug in issue #77. --- macroeco/empirical/_empirical.py | 44 ++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 8dcca39..9d1fae8 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -74,7 +74,9 @@ "x:2; y:2; year:split" will perform the analysis separately for each of four subplots of the patch (created by dividing the x and y coordinates each into two equally sized divisions) within each of the three years, - for a total of 12 separate analyses.""" + for a total of 12 separate analyses. Note that if you pass in the x + split you MUST also pass in a y split (even if it is just "y:1") or vice + versa. Otherwise, the computed areas will be incorrect.""" division_note = \ """The parameter divisions describes how to successively divide the patch @@ -147,6 +149,7 @@ def __init__(self, metadata_path, subset=''): self.table = self._load_table(metadata_path, self.meta['Description']['datapath']) + self.incremented = False def _load_table(self, metadata_path, data_path): """ @@ -174,8 +177,10 @@ def _load_table(self, metadata_path, data_path): if extension == 'csv': full_table = pd.read_csv(data_path, index_col=False) table = _subset_table(full_table, self.subset) - self.meta = _subset_meta(self.meta, self.subset) + self.meta, _ = _subset_meta(self.meta, self.subset) elif extension in ['db', 'sql']: + + # TODO: deal with incrementing in DB table table = self._get_db_table(data_path, extension) else: raise TypeError('Cannot process file of type %s' % extension) @@ -274,7 +279,7 @@ def _subset_table(full_table, subset): return full_table[valid] -def _subset_meta(full_meta, subset): +def _subset_meta(full_meta, subset, incremented=False): """ Return metadata reflecting all conditions in subset @@ -284,6 +289,8 @@ def _subset_meta(full_meta, subset): Metadata object subset : str String describing subset of data to use for analysis + incremented : bool + If True, the metadata has already been incremented Returns ------- @@ -292,7 +299,7 @@ def _subset_meta(full_meta, subset): """ if not subset: - return full_meta + return full_meta, False meta = {} # Make deepcopy of entire meta (all section dicts in meta dict) for key, val in full_meta.iteritems(): @@ -300,6 +307,7 @@ def _subset_meta(full_meta, subset): conditions = subset.replace(' ','').split(';') + inc = False for condition in conditions: condition_list = re.split('[<>=]', condition) col = condition_list[0] @@ -318,15 +326,23 @@ def _subset_meta(full_meta, subset): elif operator == '>=': meta[col]['min'] = val elif operator == '>': - meta[col]['min'] = str(eval(val) + eval(col_step)) + if incremented: + meta[col]['min'] = val + else: + meta[col]['min'] = str(eval(val) + eval(col_step)) + inc = True elif operator == '<=': meta[col]['max'] = val elif operator == '<': - meta[col]['max'] = str(eval(val) - eval(col_step)) + if incremented: + meta[col]['max'] = val + else: + meta[col]['max'] = str(eval(val) - eval(col_step)) + inc = True else: raise ValueError, "Subset %s not valid" % condition - return meta + return meta, inc @log_start_end @@ -508,7 +524,7 @@ def _sar_ear_inner(patch, cols, splits, divs, y_func): subdivlist = _split_divs(divs) for subdiv in subdivlist: spatial_table = _yield_spatial_table(subpatch, subdiv, spp_col, - count_col, x_col, y_col) + count_col, x_col, y_col) subresulty.append(y_func(spatial_table, all_spp)) subresultx.append(A0 / eval(subdiv.replace(',', '*'))) subresultnspp.append(np.mean(spatial_table['n_spp'])) @@ -995,7 +1011,9 @@ def _yield_subpatches(patch, splits, name='split'): log.info('Analyzing subset %s: %s' % (name, subset)) subpatch = copy.copy(patch) subpatch.table = _subset_table(patch.table, subset) - subpatch.meta = _subset_meta(patch.meta, subset) + subpatch.meta, subpatch.incremented = _subset_meta(patch.meta, + subset, incremented=True) + yield subset, subpatch else: yield '', patch @@ -1054,7 +1072,11 @@ def _patch_area(patch, x_col, y_col): col_step = eval(patch.meta[col]['step']) col_min = eval(patch.meta[col]['min']) col_max = eval(patch.meta[col]['max']) - lengths.append(col_max - col_min + col_step) + + if patch.incremented: + lengths.append(col_max - col_min) + else: + lengths.append(col_max - col_min + col_step) return lengths[0] * lengths[1] @@ -1063,7 +1085,9 @@ def _col_starts_ends(patch, col, slices): col_step = eval(patch.meta[col]['step']) col_min = eval(patch.meta[col]['min']) col_max = eval(patch.meta[col]['max']) + edges = np.linspace(col_min-col_step/2, col_max+col_step/2, eval(slices)+1) + starts = edges[:-1] ends = edges[1:] From 812473b8186351502a232431179ca0ac710c578e Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 16 Sep 2014 09:47:41 -0700 Subject: [PATCH 330/343] Updated empirical unit tests. All passing --- macroeco/empirical/test_empirical.py | 65 ++++++++++++++++------------ 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/macroeco/empirical/test_empirical.py b/macroeco/empirical/test_empirical.py index f98868e..646d1c6 100644 --- a/macroeco/empirical/test_empirical.py +++ b/macroeco/empirical/test_empirical.py @@ -62,12 +62,12 @@ class TestSAD(Patches): def test_simple(self): # Falling back on spp_col in metadata, so count 1 for each row sad = emp.sad(self.pat1, None, None) - assert_equal(sad[0][1]['y'], [3,2]) + assert_array_equal(sad[0][1]['y'], [3,2]) def test_simple_with_cols(self): # Specify count and spp_col here sad = emp.sad(self.pat1, self.cols1, None) - assert_equal(sad[0][1]['y'], [4,4]) + assert_array_equal(sad[0][1]['y'], [4,4]) def test_two_way_split(self): # Complete split generates 6 results @@ -111,46 +111,55 @@ class TestSSAD(Patches): def test_no_splits(self): # Just total abundance by species ssad = emp.ssad(self.pat1, self.cols1, None) - assert_equal(ssad[0][1]['y'], [4]) - assert_equal(ssad[1][1]['y'], [4]) + assert_array_equal(ssad[0][1]['y'], [4]) + assert_array_equal(ssad[1][1]['y'], [4]) def test_with_split(self): ssad = emp.ssad(self.pat1, self.cols1, 'x:2') - assert_equal(ssad[0][1]['y'], [4,0]) # spp a - assert_equal(ssad[1][1]['y'], [1,3]) # spp b + assert_array_equal(ssad[0][1]['y'], [4,0]) # spp a + assert_array_equal(ssad[1][1]['y'], [1,3]) # spp b class TestSAR(Patches): def test_no_splits(self): sar = emp.sar(self.pat1, self.cols1, None, '1,1; 2,1; 2,3') - assert_almost_equal(sar[0][1]['x'], + assert_array_almost_equal(sar[0][1]['x'], [1*self.A1, 0.5*self.A1, 1/6*self.A1]) - assert_equal(sar[0][1]['y'], [2, 1.5, (1+2+1+0+0+1)/6.]) + assert_array_equal(sar[0][1]['y'], [2, 1.5, (1+2+1+0+0+1)/6.]) def test_with_split(self): sar = emp.sar(self.pat1, self.cols1, 'year:split', '2,1; 1,3') - assert_almost_equal(sar[0][1]['x'], [0.5*self.A1, 1/3.*self.A1]) - assert_almost_equal(sar[1][1]['x'], [0.5*self.A1, 1/3.*self.A1]) - assert_equal(sar[0][1]['y'], [0.5, 2/3.]) - assert_equal(sar[1][1]['y'], [3/2., 1]) + assert_array_almost_equal(sar[0][1]['x'], [0.5*self.A1, 1/3.*self.A1]) + assert_array_almost_equal(sar[1][1]['x'], [0.5*self.A1, 1/3.*self.A1]) + assert_array_equal(sar[0][1]['y'], [0.5, 2/3.]) + assert_array_equal(sar[1][1]['y'], [3/2., 1]) def test_single_division(self): sar = emp.sar(self.pat1, self.cols1, None, '2,1') - assert_almost_equal(sar[0][1]['x'], [0.5*self.A1]) - assert_equal(sar[0][1]['y'], [1.5]) + assert_array_almost_equal(sar[0][1]['x'], [0.5*self.A1]) + assert_array_equal(sar[0][1]['y'], [1.5]) + + def test_empty_equals_split_subset(self): + sar_empty = emp.sar(self.pat1, self.cols1, "", '1,1') + sar_split = emp.sar(self.pat1, self.cols1, "x:1; y:1", '1,1') + print sar_empty + print sar_split + assert_frame_equal(sar_empty[0][1].sort(axis=1), + sar_split[0][1].sort(axis=1)) + class TestEAR(Patches): def test_no_splits(self): sar = emp.sar(self.pat1, self.cols1, None, '1,1; 2,1; 2,3', ear=True) - assert_equal(sar[0][1]['y'], [2, 0.5, 0]) + assert_array_equal(sar[0][1]['y'], [2, 0.5, 0]) def test_with_split(self): sar = emp.sar(self.pat1, self.cols1, 'year:split', '2,1;1,3', ear=True) - assert_equal(sar[0][1]['y'], [0.5, 0]) - assert_equal(sar[1][1]['y'], [0.5, 1/3.]) + assert_array_equal(sar[0][1]['y'], [0.5, 0]) + assert_array_equal(sar[1][1]['y'], [0.5, 1/3.]) class TestCommGrid(Patches): @@ -158,43 +167,43 @@ class TestCommGrid(Patches): def test_no_splits_Sorensen(self): comm = emp.comm_grid(self.pat1, self.cols1, None, '2,1') assert_almost_equal(comm[0][1]['x'], [0.1]) - assert_equal(comm[0][1]['y'], [2./(2+1)]) + assert_array_equal(comm[0][1]['y'], [2./(2+1)]) def test_no_splits_Jaccard(self): comm = emp.comm_grid(self.pat1, self.cols1, None, '2,1', metric='Jaccard') assert_almost_equal(comm[0][1]['x'], [0.1]) - assert_equal(comm[0][1]['y'], [1/2.]) + assert_array_equal(comm[0][1]['y'], [1/2.]) def test_with_split(self): comm = emp.comm_grid(self.pat1, self.cols1, 'year:split', '2,1') - assert_equal(comm[0][1]['y'], [0]) - assert_equal(comm[1][1]['y'], [2/3.]) + assert_array_equal(comm[0][1]['y'], [0]) + assert_array_equal(comm[1][1]['y'], [2/3.]) def test_y_division_even(self): comm = emp.comm_grid(self.pat1, self.cols1, '', '1,3') - assert_equal(comm[0][1]['pair'], ['(0.15 0.1) - (0.15 0.2)', + assert_array_equal(comm[0][1]['pair'], ['(0.15 0.1) - (0.15 0.2)', '(0.15 0.1) - (0.15 0.3)', '(0.15 0.2) - (0.15 0.3)']) - assert_almost_equal(comm[0][1]['x'], [0.1, 0.2, 0.1]) - assert_equal(comm[0][1]['y'], [2/3., 2/3., 1.]) + assert_array_almost_equal(comm[0][1]['x'], [0.1, 0.2, 0.1]) + assert_array_equal(comm[0][1]['y'], [2/3., 2/3., 1.]) def test_x_y_division_uneven_y(self): comm = emp.comm_grid(self.pat1, self.cols1, '', '2,2') print comm - assert_equal(comm[0][1]['pair'], ['(0.1 0.125) - (0.1 0.275)', + assert_array_equal(comm[0][1]['pair'], ['(0.1 0.125) - (0.1 0.275)', '(0.1 0.125) - (0.2 0.125)', '(0.1 0.125) - (0.2 0.275)', '(0.1 0.275) - (0.2 0.125)', '(0.1 0.275) - (0.2 0.275)', '(0.2 0.125) - (0.2 0.275)']) - assert_almost_equal(comm[0][1]['x'], [0.15, 0.1, 0.180278, 0.180278, + assert_array_almost_equal(comm[0][1]['x'], [0.15, 0.1, 0.180278, 0.180278, 0.1, 0.15], 6) - assert_equal(comm[0][1]['y'], [2/3., 0, 0, 0, 2/3., 0]) + assert_array_equal(comm[0][1]['y'], [2/3., 0, 0, 0, 2/3., 0]) def test_x_y_division_uneven_y_jaccard(self): comm = emp.comm_grid(self.pat1, self.cols1, '', '2,2',metric='Jaccard') - assert_equal(comm[0][1]['y'], [1/2., 0, 0, 0, 1/2., 0]) + assert_array_equal(comm[0][1]['y'], [1/2., 0, 0, 0, 1/2., 0]) class TestORing(Patches): # TODO: Main may fail with error if dataframe has no records when trying to From f96c6759f5b160361ac6c1238dff7b461e49443b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Tue, 30 Sep 2014 14:30:15 -0700 Subject: [PATCH 331/343] Changed log to np.log in logser --- macroeco/models/_distributions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index f6c8e2b..1cff55b 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -911,7 +911,7 @@ def _cdf(self, x, p): return stats.logser.cdf(x, p) def _stats(self, p): - r = log(1 - p) + r = np.log(1 - p) mu = p / (p - 1.0) / r mu2p = -p / r / (p - 1.0)**2 var = mu2p - mu*mu From 739a03a0e175cc3b2ca8e75fc588f6b28c28b11b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 29 Oct 2014 23:44:53 -0700 Subject: [PATCH 332/343] Guess for sigma can't be zero --- macroeco/models/_distributions.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index f6c8e2b..bfd539a 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -1121,8 +1121,12 @@ def translate_args(self, mean, sigma): @inherit_docstring_from(rv_discrete_meco) def fit_mle(self, data): - mu0 = np.mean(np.log(data)) - sig0 = np.std(np.log(data)) + mu0 = np.mean(np.log(np.array(data) + 1)) + sig0 = np.std(np.log(np.array(data) + 1)) + + if sig0 == 0: + + sig0 = 1e-5 # can't be zero def mle(params): return -np.sum(self.logpmf(data, params[0], params[1])) From 2a6ff99842add993d6d400082a9b58f91944982b Mon Sep 17 00:00:00 2001 From: Mark Wilber Date: Wed, 29 Oct 2014 23:46:43 -0700 Subject: [PATCH 333/343] Added non-zero restriction for plnorm_ztrunc --- macroeco/models/_distributions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/macroeco/models/_distributions.py b/macroeco/models/_distributions.py index bfd539a..df98807 100644 --- a/macroeco/models/_distributions.py +++ b/macroeco/models/_distributions.py @@ -1248,6 +1248,10 @@ def fit_mle(self, data): mu0 = np.mean(np.log(data)) sig0 = np.std(np.log(data)) + if sig0 == 0: + + sig0 = 1e-5 # can't be zero + def mle(params): return -np.sum(np.log(self._pmf(data, params[0], params[1]))) From 86a0abf00b45da21ca0543a9f277fc0056b4d112 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 13 Sep 2014 20:31:15 -0700 Subject: [PATCH 334/343] Remove gen_sar This is much more complicated than in previous versions of macroeco where all distributions were set up to take S and N as the first two arguments. Without that consistency, it's not trivial to implement gen_sar - removing for now. --- macroeco/models/_curves.py | 92 -------------------------------------- 1 file changed, 92 deletions(-) diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index 6724f68..728af76 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -142,98 +142,6 @@ def _vals(self, x, c, z): power_law.__doc__ = power_law.__doc__.format(_doc_methods, _doc_parameters) -class generic_sar_gen(curve): - """ - A generic SAR based on a combination of an SAD and SSAD - - .. math:: - - S = c x^z - - The generic SAR may be used either for downscaling, when values of A are - less than A0, or upscaling, when values of A are greater than A0. - Downscaling creates the traditional SAR known to ecologists, while - upscaling is useful for estimating large-scale species richness from small- - scale plot data. - - A keyword argument iterative is available for the generic SAR (default is - False). If True, the SAR is calculated at successive A values, with the - result at each value of A used as the base values of S and N for the - subsequent calculation. The generic iterative SAR form is a generalization - of the universal SAR proposed by Harte et al [#]_. - - Methods - ------- - vals(x, S0, N0, SAD_model, SSAD_model) - Calculate SAR given starting values and two models. See notes. - - Parameters - ---------- - x : iterable - Areas at which to calculate SAR (first element is A0) - S0 : float - Species richness at A0 - SAD_model : object - Frozen distribution from macroeco.models - SSAD_model : object - Frozen distribution from macroeco.models - tol : float - Stop calculation when 1 - tol of pdf of SAD has been evaluated. Since - the SSAD is generally decreasing at high SAD values, this is nearly - always an upper limit on the error in the SAR. - - Notes - ----- - The SAR results here are an underestimate, determined by the value of tol, - as the summation is carried out only the cdf of the SAD reaches 1-tol. - - Notes - ----- - The frozen distributions SAD_model and SSAD_model should generally be - frozen with parameters applicable to the base scale at which S0 is - measured. - - References - ---------- - .. [#] - Harte, J., Smith, A. B., & Storch, D. (2009). Biodiversity scales from - plots to biomes with a universal species-area curve. Ecology Letters, - 12(8), 789-797. - - """ - - def _vals(self, x, S0, SAD_model, SSAD_model): - # x is area, y is S - - A0 = x[0] - y = [S0] - - for A in x[1:]: - a = A/A0 - - if a == 1: - S1 = S0 - elif a < 1: - S1 = self._downscale_step(a, SAD_model, SSAD_model) - else: - S1 = self._upscale_step(a, SAD_model, SSAD_model) - - y.append(S1) - - return np.array(y) - - def _downscale_step(self, a, SAD_model, SSAD_model): - pass - - def _upscale_step(self, a, SAD_model, SSAD_model): - pass - - def fit_lsq(self, patch, cols, SAD_model_name, SSAD_model_name): - raise NotImplementedError, ("fit method not available for generic sar") - -generic_sar = generic_sar_gen(name='generic_sar', parameters='') - - class mete_sar_gen(curve): """ A SAR/EAR predicted by the Maximum Entropy Theory of Ecology From bf5e2683b76d89f9f25b2639cfe268da78228c69 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Sat, 13 Sep 2014 20:31:59 -0700 Subject: [PATCH 335/343] Finish mete_sar and initial unit tests --- macroeco/models/__init__.py | 6 +-- macroeco/models/_curves.py | 89 ++++++++++++++++++---------------- macroeco/models/test_curves.py | 59 ++++++++++++++++++++++ 3 files changed, 109 insertions(+), 45 deletions(-) create mode 100644 macroeco/models/test_curves.py diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index d019cd0..55d457d 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -55,9 +55,8 @@ power_law mete_sar - mete_iterative_sar + mete_sar_iterative mete_ear - mete_iterative_ear """ @@ -67,5 +66,4 @@ dgamma) from ._curves import (power_law, - mete_sar, mete_iterative_sar, - mete_ear, mete_iterative_ear) + mete_sar, mete_sar_iterative, mete_ear) diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index 728af76..b40ff67 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -171,13 +171,14 @@ class mete_sar_gen(curve): Species richness at A0 N0 : float Community abundance at A0 - SAD_model : object - Frozen distribution from macroeco.models - SSAD_model : object - Frozen distribution from macroeco.models - iterative : bool + iterative : bool (opt) If true, SAR calculation for subplots are based on variables for next larger area instead of initial plot variables. Default False. + array_size : int (opt) + Maximum size of array for SAD pmf's. If N0 is greater than this value, + calculation proceeds using array_size increments until N0 is reached. + approx : bool (opt) + Use non-truncated logseries and geometric distributions. Default False. References ---------- @@ -192,18 +193,18 @@ def __init__(self, name=None, parameters=None, iterative=False, ear=False): """ Provides extra iterative attribute. """ + if iterative and ear: + raise ValueError, "Iterative EAR calculation is not possible" + self.name = name self.parameters = parameters self.n_parameters = len(parameters.split(',')) self.iterative = iterative self.ear = ear - def _vals(self, x, S0, N0, iterative=False): + def _vals(self, x, S0, N0, array_size=1e6, approx=False): # x is area, y is S - if iterative: # Override attribute set by init if passed here - self.iterative = iterative - A0 = x[0] y = [S0] @@ -213,9 +214,9 @@ def _vals(self, x, S0, N0, iterative=False): if a == 1: S1, N1 = S0, N0 elif a < 1: - S1, N1 = self._downscale_step(a, S0, N0) + S1, N1 = self._downscale_step(a, S0, N0, array_size, approx) else: - S1, N1 = self._upscale_step(a, S0, N0) + S1, N1 = self._upscale_step(a, S0, N0, array_size, approx) y.append(S1) @@ -224,37 +225,47 @@ def _vals(self, x, S0, N0, iterative=False): return np.array(y) - def _downscale_step(self, a, S0, N0, array_size=1e6): - + def _downscale_step(self, a, S0, N0, array_size, approx): lower = 1 upper = array_size + 1 S = 0 - while lower < N0: + if S0 < 1 or np.isnan(S0): # Give up if S0 too small + return np.nan, N0*a - if S0 < 1 or np.isnan(S0): # Give up and continue if S0 too small - S = np.nan - lower += array_size - upper += array_size - continue + while lower < N0: if upper > N0: - upper = N0 + upper = N0 + 1 n0 = np.arange(lower, upper) - sad_p, _ = dist.logser_uptrunc.translate_args(N0/S0, N0) - sad = dist.logser_uptrunc.pmf(n0, sad_p, N0) + + if approx: + sad_p = dist.logser.translate_args(N0/S0) + sad = dist.logser.pmf(n0, sad_p) + else: + sad_p, _ = dist.logser_uptrunc.translate_args(N0/S0, N0) + sad = dist.logser_uptrunc.pmf(n0, sad_p, N0) if np.isclose(a, 0.5): ssad_p = 1 / (n0 + 1) else: - ssad_p, _ = dist.geom_uptrunc.translate_args(a*n0, N0) + if approx: + ssad_p = dist.geom.translate_args(a*n0) + else: + ssad_p, _ = dist.geom_uptrunc.translate_args(a*n0, N0) if self.ear: - ssad = dist.geom_uptrunc.pmf(n0, ssad_p, N0) + if approx: + ssad = dist.geom.pmf(n0, ssad_p) + else: + ssad = dist.geom_uptrunc.pmf(n0, ssad_p, N0) S += S0 * np.sum(ssad * sad) else: - ssad = dist.geom_uptrunc.pmf(0, ssad_p, N0) + if approx: + ssad = dist.geom.pmf(0, ssad_p) + else: + ssad = dist.geom_uptrunc.pmf(0, ssad_p, N0) S += S0 * np.sum((1 - ssad) * sad) lower += array_size @@ -262,8 +273,14 @@ def _downscale_step(self, a, S0, N0, array_size=1e6): return S, N0*a - def _upscale_step(self, a, S0, N0): - raise NotImplementedError, "Upscaling not implemented yet" + def _upscale_step(self, a, S0, N0, array_size, approx): + + N1 = N0*a + + def eq(S1, N1, a, S0, array_size, approx): + return S0-self._downscale_step(1/a, S1, N1, array_size, approx)[0] + + return optimize.brentq(eq,S0,S0*a,args=(N1,a,S0,array_size,approx)), N1 def fit_lsq(self, df): """ @@ -276,28 +293,18 @@ def fit_lsq(self, df): Notes ----- - Method does not use least squares to fit, but rather parameterizes SAD - and SSAD mdoels based on SAR output. Name ``fit_lsq`` is retained for + Simply returns S0 and N0 from empirical SAR output, which are two fixed + parameters of METE SAR and EAR. The first row of the empirical + dataframe corresponds to area A0. Name ``fit_lsq`` is retained for consistency with other curves. - The first row of the empirical dataframe must be for an area A = A0. - """ # Just return S0 and N0 at largest scale, which is first row of df return df['n_spp'].values[0], df['n_individs'].values[0] mete_sar = mete_sar_gen(name='mete_sar', parameters='S0,N0') -mete_iterative_sar = mete_sar_gen(name='mete_iterative_sar', +mete_sar_iterative = mete_sar_gen(name='mete_iterative_sar', parameters='S0,N0', iterative=True) mete_ear = mete_sar_gen(name='mete_ear', parameters='S0,N0', ear=True) -mete_iterative_ear = mete_sar_gen(name='mete_iterative_sar', - parameters='S0,N0', iterative=True, ear=True) - - - - - - - diff --git a/macroeco/models/test_curves.py b/macroeco/models/test_curves.py new file mode 100644 index 0000000..01e2c0c --- /dev/null +++ b/macroeco/models/test_curves.py @@ -0,0 +1,59 @@ +from __future__ import division + +from numpy.testing import (TestCase, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_, assert_raises) + +import numpy as np +from decimal import Decimal +from macroeco.models import * +import scipy as sp +import scipy.stats as stats + + +class METE_SAR(TestCase): + + def test_reversible(self): + S0, N0 = 100, 1e6 + As = np.array([100,50,10]) + Ns = N0 * As / As[0] + + Ss = mete_sar.vals(As, 100, 1e6, approx=True) + + # Start with each smaller base and go directly up to A0 + for A, S, N in zip(As[1:], Ss[1:], Ns[1:]): + assert_almost_equal(S0, + mete_sar.vals([A, As[0]], S, N, approx=True)[1]) + + def test_vals_down(self): + pass + + def test_vals_up(self): + pass + +class METE_iterative_SAR(TestCase): + + def test_reversible(self): + S0, N0 = 100, 1e6 + As = np.array([100,50,10]) + Ns = N0 * As / As[0] + + Ss = mete_sar_iterative.vals(As, 100, 1e6, approx=True) + + assert_array_almost_equal(Ss[::-1], + mete_sar_iterative.vals(As[::-1], Ss[-1], Ns[-1], approx=True)) + + def test_vals_down(self): + pass + + def test_vals_up(self): + # ACARI results from Bassett upscaling paper, see SI + # Note that different approximations are used here and in that analysis + S0, N0 = 86.6, 2015 + As = [0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56] + + Ss = mete_sar_iterative.vals(As, S0, N0, approx=True) + + assert_array_almost_equal(Ss, + [86.6, 106.0327113, 127.1223631, 149.7292838, + 173.7360065, 199.0452844, 225.5766732]) From 4c3a88e77549423a68796ec3c891a0c3417e2b54 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 13:51:13 -0700 Subject: [PATCH 336/343] Ignore egg-info files from local development installs --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index bdd4820..a6d5175 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ _private/* *.pyc *.DS_Store *.pdf +*egg-info* + From d5e0c47cbf6a3e3712e62cd347281e89080a3cc2 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 13:51:35 -0700 Subject: [PATCH 337/343] Add expanded output option to empirical o-ring --- macroeco/empirical/_empirical.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/macroeco/empirical/_empirical.py b/macroeco/empirical/_empirical.py index 9d1fae8..b73d836 100644 --- a/macroeco/empirical/_empirical.py +++ b/macroeco/empirical/_empirical.py @@ -680,7 +680,7 @@ def _yield_spatial_table(patch, div, spp_col, count_col, x_col, y_col): @log_start_end @doc_sub(metric_params, metric_return, cols_note, splits_note) -def o_ring(patch, cols, splits, spp, bin_edges, density=True): +def o_ring(patch, cols, splits, spp, bin_edges, density=True, full=False): """ Calculates univariate O-ring for a species @@ -694,6 +694,9 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): density : bool If True, return densities (counts divided by area of torus defined by bin edges) instead of counts. Default True. + full : bool + If True, return a separate column giving density at distance x for + every individual, rather than mean density. Default False. Returns ------- @@ -763,8 +766,13 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): counts = list(spp_table[count_col]) # Arrays to hold summed areas and distance histograms for all points - areas = np.zeros(len(radii)) - hists = np.zeros(len(radii)) + + if full: + hists = [] # Vectors of len(radii) appended for each point + areas = [] + else: + hists = np.zeros(len(radii)) + areas = np.zeros(len(radii)) # Go through each point and associated count for i, (point, count) in enumerate(zip(points, counts)): @@ -803,15 +811,24 @@ def o_ring(patch, cols, splits, spp, bin_edges, density=True): circ.boundary.length) # Add hist and corrected area for this point to running totals - hists += hist - areas += torus_areas * corr_factor * count + if full: + hists.append(hist) + areas.append(torus_areas * corr_factor * count) + else: + hists += hist + areas += torus_areas * corr_factor * count # If density, divide summed torus counts by summed areas if density: - hists = hists / areas + hists = np.array(hists) / np.array(areas) # Append subset result - subresult = pd.DataFrame({'x': radii, 'y': hists}) + subresult = pd.DataFrame({'x': radii}) + if full: + for i in range(len(hists)): + subresult[i] = hists[i] + else: + subresult['y'] = hists result_list.append((substring, subresult)) # Return all results From 24f17d112bb5a1dd4b02bb4d7999c9f064bb2f40 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 13:52:26 -0700 Subject: [PATCH 338/343] Avoid figure parameter pollution in main.py, closes #84 --- macroeco/main/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macroeco/main/main.py b/macroeco/main/main.py index 93e81b3..7ce2843 100644 --- a/macroeco/main/main.py +++ b/macroeco/main/main.py @@ -10,7 +10,6 @@ import copy log = log.name('meco') - import numpy as np import pandas as pd @@ -23,8 +22,6 @@ from .. import compare as comp from .. import misc -mpl.rcParams.update(misc.rcparams.ggplot_rc) - def main(param_path='parameters.txt'): """ @@ -383,6 +380,9 @@ def _save_results(options, module, core_results, fit_results): log.info("Saving all results") + # Use custom plot format + mpl.rcParams.update(misc.rcparams.ggplot_rc) + # Make run directory os.makedirs(options['run_dir']) From 1bd257822acb684a48c0bea3786444dafab938c9 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 13:53:13 -0700 Subject: [PATCH 339/343] Add faster double-based METE upscaling function based on macroecotools --- macroeco/models/__init__.py | 3 +- macroeco/models/_curves.py | 79 ++++++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/macroeco/models/__init__.py b/macroeco/models/__init__.py index 55d457d..4eb9c59 100644 --- a/macroeco/models/__init__.py +++ b/macroeco/models/__init__.py @@ -66,4 +66,5 @@ dgamma) from ._curves import (power_law, - mete_sar, mete_sar_iterative, mete_ear) + mete_sar, mete_sar_iterative, mete_upscale_iterative_alt, + mete_ear) diff --git a/macroeco/models/_curves.py b/macroeco/models/_curves.py index b40ff67..e109f51 100644 --- a/macroeco/models/_curves.py +++ b/macroeco/models/_curves.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd from scipy import optimize +from mpmath import lerchphi from ..misc import inherit_docstring_from import _distributions as dist @@ -202,7 +203,7 @@ def __init__(self, name=None, parameters=None, iterative=False, ear=False): self.iterative = iterative self.ear = ear - def _vals(self, x, S0, N0, array_size=1e6, approx=False): + def _vals(self, x, S0, N0, array_size=1e6, approx=False, alt_up=False): # x is area, y is S A0 = x[0] @@ -229,11 +230,13 @@ def _downscale_step(self, a, S0, N0, array_size, approx): lower = 1 upper = array_size + 1 S = 0 + print S0,N0 if S0 < 1 or np.isnan(S0): # Give up if S0 too small return np.nan, N0*a while lower < N0: + print lower if upper > N0: upper = N0 + 1 @@ -276,6 +279,7 @@ def _downscale_step(self, a, S0, N0, array_size, approx): def _upscale_step(self, a, S0, N0, array_size, approx): N1 = N0*a + print a def eq(S1, N1, a, S0, array_size, approx): return S0-self._downscale_step(1/a, S1, N1, array_size, approx)[0] @@ -308,3 +312,76 @@ def fit_lsq(self, df): parameters='S0,N0', iterative=True) mete_ear = mete_sar_gen(name='mete_ear', parameters='S0,N0', ear=True) + +def mete_upscale_iterative_alt(S, N, doublings): + """ + This function is used to upscale from the anchor area. + + Parameters + ---------- + S : int or float + Number of species at anchor scale + N : int or float + Number of individuals at anchor scale + doublings : int + Number of doublings of A. Result vector will be length doublings + 1. + + Returns + ------- + result : ndarray + 1D array of number of species at each doubling + + """ + + # Arrays to store N and S at all doublings + n_arr = np.empty(doublings+1) + s_arr = np.empty(doublings+1) + + # Loop through all scales + for i in xrange(doublings+1): + + # If this is first step (doubling 0), N and S are initial values + if i == 0: + n_arr[i] = N + s_arr[i] = S + + # If not first step + else: + + # Get previous S + SA = s_arr[i-1] + + # N is double previous N + n_arr[i] = 2 * n_arr[i-1] + N2A = n_arr[i] + + # Eq 8 from Harte 2009, setup to return S2A given input of x + # x is exp(-lam_phi, 2A) + def S2A_calc(x, SA, N2A): + return ((SA + + N2A * + (1-x)/(x-x**(N2A+1)) * + (1 - (x**N2A)/(N2A+1))) / + x**-1) + + # Eq 9 from Harte 2009, setup to equal to zero, used to solve x + # Note that two summations are replaced by known formulas for sum + # of geometric and logarithmic series. + # Note "offset" of 1e-23, which is needed because f(a) and f(b) do + # not have the same sign in solver otherwise. This introduces no + # more than a 1e-23 error in the calculation of x, which should not + # cause a significant problem. + def x_calc(x, SA, N2A): + return (S2A_calc(x,SA,N2A) / + N2A * + x*(x**N2A-1)/(x-1) - + (x**N2A * (-lerchphi(x,1,N2A+1))-np.log(1-x)) ) - 1e-23 + + # Solve for x + x = (optimize.brentq(x_calc, 1e-24, 1-1e-16, args=(SA,N2A), + xtol=1e-16, maxiter=1000, disp=True) + 1e-23) + + # Given x, calculate S2A + s_arr[i] = S2A_calc(x,SA,N2A) + + return s_arr From 2f7291422bcf7abc074fda50b6eaab1f10861033 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 19:41:02 -0700 Subject: [PATCH 340/343] Updates to README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 51fc6af..333c18c 100644 --- a/README.rst +++ b/README.rst @@ -5,4 +5,4 @@ Macroeco is a Python package that provides a comprehensive set of functions for Extensive documentation for macroeco, including detailed installation instructions, tutorials, and a reference guide, is available at http://macroeco.org. The most recent stable version of the macroeco package can be installed from PyPI (``pip install macroeco``). For users who do not program in Python, a standalone application called Macroeco Desktop, which provides most of the functionality of macroeco through a simple interface that requires no programming, is also available. -The current version of macroeco was developed at the University of California, Berkeley by Justin Kitzes and Mark Wilber and is maintained by Justin Kitzes. Other contributors include Chloe Lewis and Ethan White. The development of macroeco has been supported by the National Science Foundation, the Gordon and Betty Moore Foundation, and the Berkeley Institute for Global Change Biology. +The current version of macroeco is developed and maintained by Justin Kitzes (UC Berkeley) and Mark Wilber (UC Santa Barbara). Other contributors include Chloe Lewis and Ethan White. The development of macroeco has been supported by the National Science Foundation, the Gordon and Betty Moore Foundation, and the Berkeley Institute for Data Science. From f5361f7c12f10cd56a853ef66648584d5cfc0e40 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 19:43:37 -0700 Subject: [PATCH 341/343] Add mpmath to doc requirements --- doc/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/requirements.txt b/doc/requirements.txt index 1ac4629..8e4e869 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,6 +2,7 @@ scipy numpy matplotlib pandas +mpmath configparser decorator twiggy From 416d604b2f7c50c1ee0892ac268281e922791584 Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 19:43:49 -0700 Subject: [PATCH 342/343] Add link to release page --- doc/tutorial_getting_started.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tutorial_getting_started.rst b/doc/tutorial_getting_started.rst index 7d91c28..5e5bdeb 100644 --- a/doc/tutorial_getting_started.rst +++ b/doc/tutorial_getting_started.rst @@ -18,11 +18,11 @@ Installation For users with an existing scientific Python environment, the latest stable version of both ``macroeco`` and the MacroecoDesktop interface can be installed with ``pip install macroeco``. Several package dependencies may also be installed by this command. The latest development version of Macroeco can be found in the "develop" branch of the `Macroeco GitHub repo `_. -Mac OS X users who wish only to use MacroecoDesktop can instead download the MacroecoDesktop application from this link. After unzipping, drag the MacroecoDesktop application into the Applications folder. +Mac OS X users who wish only to use MacroecoDesktop can instead download the MacroecoDesktop application from `this link `_. After unzipping, drag the MacroecoDesktop application into the Applications folder. Windows and Linux users who wish to use MacroecoDesktop will need to set up a scientific Python environment. The developers recommend the free `Continuum Anaconda `_ scientific Python installation for new users. After downloading and installing Anaconda, run the command ``pip install macroeco`` from a Terminal window. -The remainder of this tutorial uses demo data from a vegetation census in Anza-Borrego Desert State Park in southern California. This demo data can be downloaded at this link. The file ANBO.csv contains the census data and the file ANBO.txt contains metadata describing the data table. This data may be freely shared and used for analysis so long as credit is given to the authors. +The remainder of this tutorial uses demo data from a vegetation census in Anza-Borrego Desert State Park in southern California. This demo data can be downloaded at `this link `_. The file ANBO.csv contains the census data and the file ANBO.txt contains metadata describing the data table. This data may be freely shared and used for analysis so long as credit is given to the authors. .. _first-steps-macroeco: From beeec88a2d0d6e80855cde418eb85fc5ad1920cc Mon Sep 17 00:00:00 2001 From: Justin Kitzes Date: Thu, 30 Oct 2014 19:48:33 -0700 Subject: [PATCH 343/343] Correct sar curve names in docs --- doc/tutorial_macroeco_desktop.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/tutorial_macroeco_desktop.rst b/doc/tutorial_macroeco_desktop.rst index 0a680e7..2540916 100644 --- a/doc/tutorial_macroeco_desktop.rst +++ b/doc/tutorial_macroeco_desktop.rst @@ -113,9 +113,8 @@ The third portion of this run begins with the input parameter ``models``, which power_law mete_sar - mete_iterative_sar + mete_sar_iterative mete_ear - mete_iterative_ear If the metric is a probability distribution, the following models may be used (note that some are discrete and some continuous).