glmm.bib

@article{roulin_nestling_2007,
	title = {Nestling barn owls beg more intensely in the presence of their mother than in the presence of their father},
	volume = {74},
	issn = {0003-3472},
	url = {http://www.sciencedirect.com/science/article/B6W9W-4PK8B6H-8/2/e43cfbaad4dc0bb2207adfc54a460c89},
	doi = {10.1016/j.anbehav.2007.01.027},
	abstract = {Nestling begging behaviour may be an honest signal of need used by parents to adjust optimally both feeding rate and within-brood food allocation. Although several studies showed that mothers and fathers can be differentially responsive to nestling begging behaviour with one parent showing a stronger tendency to feed the offspring that beg the most, little information is yet available on whether offspring beg for food at different intensities from the mother than father. In the present study, we investigated in nestling barn owls whether the intensity of vocal begging behaviour in the presence of the mother and in the presence of the father is different. A difference is expected because reproductive tasks are divided between the sexes with fathers bringing more food items to the nest than mothers. The results show that although mothers transfer their prey item to one of the offspring more rapidly than fathers once in their nestbox, nestlings begged more intensely in the presence of their mother than in the presence of their father. To our knowledge, this is the first empirical evidence that offspring vocalize to different levels in the presence of their mother than in the presence of their father.},
	number = {4},
	journal = {Animal Behaviour},
	author = {Alexandre Roulin and {Louis-Felix} Bersier},
	month = oct,
	year = {2007},
	keywords = {barn owl, begging, parental care, parent-offspring conflict, sexual conflict, sibling negotiation, Tyto alba},
	pages = {1099--1106}
}

@book{zuur_mixed_2009,
	title = {Mixed Effects Models and Extensions in Ecology with {R}},
	isbn = {0387874577},
	publisher = {Springer},
	author = {Alain F. Zuur and Elena N. Ieno and Neil J. Walker and Anatoly A. Saveliev and Graham M. Smith},
	month = mar,
	year = {2009}
}

@article{vaida_conditional_2005,
	title = {Conditional {Akaike} information for mixed-effects models},
	volume = {92},
	url = {http://biomet.oxfordjournals.org/cgi/content/abstract/92/2/351},
	doi = {10.1093/biomet/92.2.351},
	abstract = {This paper focuses on the Akaike information criterion, {AIC,} for linear mixed-effects models in the analysis of clustered data. We make the distinction between questions regarding the population and questions regarding the particular clusters in the data. We show that the {AIC} in current use is not appropriate for the focus on clusters, and we propose instead the conditional Akaike information and its corresponding criterion, the conditional {AIC,} {cAIC.} The penalty term in {cAIC} is related to the effective degrees of freedom {rho} for a linear mixed model proposed by Hodges \& Sargent (2001); {rho} reflects an intermediate level of complexity between a fixed-effects model with no cluster effect and a corresponding model with fixed cluster effects. The {cAIC} is defined for both maximum likelihood and residual maximum likelihood estimation. A pharmacokinetics data application is used to illuminate the distinction between the two inference settings, and to illustrate the use of the conditional {AIC} in model selection. },
	number = {2},
	journal = {Biometrika},
	author = {Florin Vaida and Suzette Blanchard},
	month = jun,
	year = {2005},
	pages = {351--370}
}

@incollection{breslow_whither_2004,
	title = {Whither {PQL?}},
	isbn = {0387208623},
	booktitle = {Proceedings of the second {Seattle} symposium in biostatistics: Analysis of correlated data},
	publisher = {Springer},
	author = {N. E. Breslow},
	editor = {Danyu Y. Lin and P. J. Heagerty},
	year = {2004},
	pages = {1--22}
}


@article{molenberghs_likelihood_2007,
	title = {Likelihood Ratio, Score, and {Wald} Tests in a Constrained Parameter Space},
	volume = {61},
	doi = {10.1198/000313007X171322},
	number = {1},
	journal = {The American Statistician},
	author = {Geert Molenberghs and Geert Verbeke},
	year = {2007},
	pages = {22--27}
}


@book{pinheiro_mixed-effects_2000,
	address = {New York},
	title = {Mixed-effects models in {S} and {S-PLUS}},
	isbn = {0-387-98957-9},
	publisher = {Springer},
	author = {José C. Pinheiro and Douglas M. Bates},
	year = {2000}
}


@book{venables_modern_2002,
	address = {New York},
	edition = {4th},
	title = {Modern Applied Statistics with S},
	publisher = {Springer},
	author = {W. Venables and Brian D. Ripley},
	year = {2002}
}


@book{littell_sas_2006,
	title = {{SAS} for Mixed Models, Second Edition},
	isbn = {1590475003},
	publisher = {{SAS} Publishing},
	author = {Ramon C. Littell and George A. Milliken and Walter W. Stroup and Russell D. Wolfinger and Oliver Schabenberger},
	year = {2006}
}

@article{fears_reminder_1996,
	title = {A Reminder of the Fallibility of the {Wald} Statistic},
	volume = {50},
	issn = {00031305},
	url = {http://www.jstor.org/stable/2684659},
	doi = {10.2307/2684659},
	abstract = {Computer programs often produce a parameter estimate θ̂ and estimated variance \${\textbackslash}widehat{var}({\textbackslash}hat{{\textbackslash}theta})\$ . Thus it is easy to compute a Wald statistic \$({\textbackslash}hat{{\textbackslash}theta} - {\textbackslash}theta\_0){\textbackslash}{{\textbackslash}widehat{var} ({\textbackslash}hat{{\textbackslash}theta}){\textbackslash}}{\textasciicircum}{-1/2}\$ to test the null hypothesis θ = θ0. Hauck and Donner and Vaeth have identified situations in which the Wald statistic has poor power. We consider another example that is not in the classes discussed by those authors. We present data from a balanced one-way random effects analysis of variance {(ANOVA)} that illustrate the poor power of the Wald statistic compared to the usual F test. In this example the parameter of interest is the variance of the random effect. The power of the Wald test depends on the parameterization used, however, and a whole family of Wald statistics with p values ranging from 0 to 1 can be generated with power transformations of the random effect parameter.},
	number = {3},
	journal = {The American Statistician},
	author = {Thomas R. Fears and Jacques Benichou and Mitchell H. Gail},
	month = aug,
	year = {1996},
	pages = {226--227}
}


@article{elston_analysis_2001,
	title = {Analysis of aggregation, a worked example: numbers of ticks on red grouse chicks},
	volume = {122},
	number = {5},
	journal = {Parasitology},
	author = {D. A. Elston and R. Moss and T. Boulinier and C. Arrowsmith and X. Lambin},
	year = {2001},
	pages = {563--569}
}

@inproceedings{schabenberger_growing_2007,
	address = {Orlando, Florida},
	title = {Growing Up Fast: {SAS®} 9.2 Enhancements to the {GLIMMIX} Procedure},
	url = {http://www2. sas.com/proceedings/forum2007/177-2007.pdf},
	abstract = {The {GLIMMIX} procedure was first released as a Web download add-on procedure to {SAS/STAT®} in {SAS}
9.1 in the 32-bit Windows operating environment. It was subsequently released as a Web download for
{SAS} 9.1.3 in {UNIX} and Linux environments. This paper describes the key changes and enhancements to
{PROC} {GLIMMIX} between the {SAS} 9.1 and {SAS} 9.2 releases.
Enhancements fall into several categories: statistical estimation, model building, post-processing, and mis-
cellaneous other features. For example, new estimation methods and covariance structures enable parame-
ter estimation with reduced bias in more flexible generalized linear mixed models. A new statement provides
a comprehensive facility to test hypotheses about covariance parameters. New multiplicity corrections offer
more ways to adjust inferences.
The goal of this paper is to highlight the new estimation and inference techniques and to demonstrate their
usage with examples.},
	author = {Oliver Schabenberger},
	year = {2007}
}


@article{pawitan_reminder_2000,
	title = {A Reminder of the Fallibility of the {Wald} Statistic: Likelihood Explanation},
	volume = {54},
	issn = {00031305},
	url = {http://www.jstor.org/stable/2685612},
	doi = {10.2307/2685612},
	abstract = {The Wald statistic is one of the most commonly used tools in applied statistics, so it is sobering to read Fears, Benichou, and Gail's recent reminder of its fallibility. What makes their example particularly relevant is the fact that the problem is manifest in a simple normal random effects model on a balanced dataset for a seemingly harmless parameter, while, in practice, one tends to rely on the Wald test in complicated or nonnormal models where there are no exact tests to serve as a gold standard. This article explains the failure of the Wald test via the profile likelihood functions, which graphically look decidedly nonnormal. The methodology used to derive the profile likelihood and the discussion of this example could be instructive for a senior or beginning graduate class in theory of statistics.},
	number = {1},
	journal = {The American Statistician},
	author = {Yudi Pawitan},
	month = feb,
	year = {2000},
	jstornote = {{ArticleType:} primary\_article / Full publication date: Feb., 2000 / Copyright © 2000 American Statistical Association},
	pages = {54--56}
}


@article{skaug_automatic_2006,
	title = {Automatic approximation of the marginal likelihood in non-gaussian hierarchical models},
	volume = {51},
	number = {2},
	journal = {Computational Statistics \& Data Analysis},
	author = {Skaug, Hans J. and Fournier, David A.},
	year = {2006},
	pages = {699--709}
}

@article{skaug_automatic_2002,
	title = {Automatic Differentiation to Facilitate Maximum
                  Likelihood Estimation in Nonlinear Random Effects
                  Models},
	volume = {11},
	issn = {1061-8600},
	url =
                  {http://pubs.amstat.org/doi/abs/10.1198/106186002760180617},
	doi = {10.1198/106186002760180617},
	number = {2},
	journal = {Journal of Computational and Graphical Statistics},
	author = {Hans J. Skaug},
	year = {2002},
	pages = {458--470}
}


@article{gelman_prior_2006,
	title = {Prior distributions for variance parameters in hierarchical models},
	volume = {1},
	url = {http://ba.stat.cmu.edu/journal/2006/vol01/issue03/gelman.pdf},
	abstract = {Various noninformative prior distributions have been suggested for scale parameters in hierarchical models. We construct a new folded-noncentral-t family of conditionally conjugate priors for hierarchical standard deviation parameters, and then consider noninformative and weakly informative priors in this family. We use an example to illustrate serious problems with the inverse-gamma family of “noninformative” prior distributions. We suggest instead to use a uniform prior on the hierarchical standard deviation, using the half-t family when the number of groups is small and in other settings where a weakly informative prior is desired. We also illustrate the use of the half-t family for hierarchical modeling of multiple variance parameters such as arise in the analysis of variance. Keywords: Bayesian inference, conditional conjugacy, folded-noncentral-t distribution, half-t distribution, hierarchical model, multilevel model, noninformative prior distribution, weakly informative prior distribution},
	number = {3},
	journal = {Bayesian Analysis},
	author = {Andrew Gelman},
	year = {2006},
	pages = {515--533}
}

  @Manual{lme4,
    title = {lme4: Linear mixed-effects models using S4 classes},
    author = {Douglas Bates and Martin Maechler},
    year = {2010},
    note = {R package version 0.999375-33},
    url = {http://CRAN.R-project.org/package=lme4},
  }

  @Manual{R,
    title = {R: A Language and Environment for Statistical Computing},
    author = {{R Development Core Team}},
    organization = {R Foundation for Statistical Computing},
    address = {Vienna, Austria},
    year = {2009},
    note = {{ISBN} 3-900051-07-0},
    url = {http://www.R-project.org},
  }


@BOOK{McCullaghNelder1989,
  title = {Generalized Linear Models},
  publisher = {Chapman and Hall},
  year = {1989},
  author = {P. McCullagh and J. A. Nelder},
  address = {London}
}


@article{ozgul_upper_2009,
	title = {Upper respiratory tract disease, force of infection, and effects on survival of gopher tortoises},
	volume = {19},
	issn = {1051-0761},
	url = {http://www.ncbi.nlm.nih.gov/pubmed/19425439},
	abstract = {Upper respiratory tract disease {(URTD)} caused by Mycoplasma agassizii has been hypothesized to contribute to the decline of some wild populations of gopher tortoises {(Gopherus} polyphemus). However, the force of infection {(FOI)} and the effect of {URTD} on survival in free-ranging tortoise populations remain unknown. Using four years (2003-2006) of mark-recapture and epidemiological data collected from 10 populations of gopher tortoises in central Florida, {USA,} we estimated the {FOI} (probability per year of a susceptible tortoise becoming infected) and the effect of {URTD} (i.e., seropositivity to M. agassizii) on apparent survival rates. Sites with high ({\textgreater} or = 25\%) seroprevalence had substantially higher {FOI} (0.22 +/- 0.03; mean +/- {SE)} than low ({\textless} 25\%) seroprevalence sites (0.04 +/- 0.01). Our results provide the first quantitative evidence that the rate of transmission of M. agassizii is directly related to the seroprevalence of the population. Seropositive tortoises had higher apparent survival (0.99 +/- 0.0001) than seronegatives (0.88 +/- 0.03), possibly because seropositive tortoises represent individuals that survived the initial infection, developed chronic disease, and experienced lower mortality during the four-year span of our study. However, two lines of evidence suggested possible effects of mycoplasmal {URTD} on tortoise survival. First, one plausible model suggested that susceptible (seronegative) tortoises in high seroprevalence sites had lower apparent survival rates than did susceptible tortoises in low seroprevalence sites, indicating a possible acute effect of infection. Second, the number of dead tortoise remains detected during annual site surveys increased significantly with increasing site seroprevalence, from approximately 1 to approximately 5 shell remains per 100 individuals. If (as our results suggest) {URTD} in fact reduces adult survival, it could adversely influence the population dynamics and persistence of this late- maturing, long-lived species.},
	number = {3},
	journal = {Ecological Applications},
	author = {Arpat Ozgul and Madan K Oli and Benjamin M Bolker and Carolina {Perez-Heydrich}},
	month = apr,
	year = {2009},
	keywords = {Animals, Environment, Female, Florida, Longevity, Male, Mycoplasma, Mycoplasma Infections, Population Density, Population Dynamics, Respiratory Tract Infections, Seroepidemiologic Studies, Sex Factors, Time Factors, Turtles},
	pages = {786--798}
}

@article{gelman_analysis_2005,
	title = {Analysis of variance: why it is more important than ever},
	volume = {33},
	doi = {doi:10.1214/009053604000001048},
	number = {1},
	journal = {Annals of Statistics},
	author = {Andrew Gelman},
	year = {2005},
	pages = {1--53}
}

@article{Hadfield:2009:JSSOBK:v33i02,
  author =	"Jarrod D. Hadfield",
  title =	"{MCMC} Methods for Multi-Response Generalized Linear Mixed Models: The {MCMCglmm} {R} Package",
  journal =	"Journal of Statistical Software",
  volume =	"33",
  number =	"2",
  pages =	"1--22",
  day =  	"2",
  month =	"2",
  year = 	"2010",
  CODEN =	"JSSOBK",
  ISSN = 	"1548-7660",
  bibdate =	"2009-12-21",
  URL =  	"http://www.jstatsoft.org/v33/i02",
  accepted =	"2009-12-21",
  acknowledgement = "",
  keywords =	"",
  submitted =	"2009-02-18",
}


@article{kenward_small_1997,
	title = {Small sample inference for fixed effects from restricted maximum likelihood},
	volume = {53},
	abstract = {Restricted maximum likelihood {(REML)} is now well established as a method for estimating the parameters of the general Gaussian linear model with a structured covariance matrix, in particular for mixed linear models. Conventionally, estimates of precision and inference for fixed effects are based on their asymptotic distribution, which is known to be inadequate for some small-sample problems. In this paper, we present a scaled Wald statistic, together with an F approximation to its sampling distribution, that is shown to perform well in a range of small sample settings. The statistic uses an adjusted estimator of the covariance matrix that has reduced small sample bias. This approach has the advantage that it reproduces both the statistics and F distributions in those settings where the latter is exact, namely for Hotelling T\${\textasciicircum}2\$ type statistics and for analysis of variance F-ratios. The performance of the modified statistics is assessed through simulation studies of four different {REML} analyses and the methods are illustrated using three examples.},
	number = {3},
	journal = {Biometrics},
	author = {M. G Kenward and J. H Roger},
	year = {1997},
	pages = {983--997}
}

@article{schaalje_adequacy_2002,
	title = {Adequacy of Approximations to Distributions of Test Statistics in Complex Mixed Linear Models},
	volume = {7},
	url = {http://www.ingentaconnect.com/content/asa/jabes/2002/00000007/00000004/art00004},
	abstract = {A recent study of lady beetle antennae was a small sample repeated measures design involving a complex covariance structure. Distributions of test statistics based on mixed models fitted to such data are unknown, but two recently developed methods for approximating the distributions of test statistics in mixed linear models have been included as options in the latest release of the {MIXED} procedure of {SAS{\textregistered}.} One method {(FC,} from Fai and Cornelius) computes degrees of freedom of an approximating F distribution for the test statistic using spectral decomposition of the hypothesis matrix together with repeated application of a method for single-degree-of-freedom tests. The other method {(KR,} from Kenward and Roger) adjusts the estimated covariance matrix of the parameter estimates, computes a scale adjustment to the test statistic, and computes the degrees of freedom of an approximating F distribution. Using the two methods, p values for a hypothesis of interest in the lady beetle study were quite different. Simulation studies on the Proc {MIXED} implementation of these methods showed that Type I error rates of both methods are affected by covariance structure complexity, sample size, and imbalance. Nonetheless, the {KR} method performs well in situations with fairly complicated covariance structures when sample sizes are moderate to small and the design is reasonably balanced. The {KR} method should be used in preference to the {FC} method, although it had inflated Type I error rates for complex covariance structures combined with small sample sizes.},
	number = {14},
	journal = {Journal of Agricultural, Biological \& Environmental Statistics},
	author = {G. Schaalje and J. {McBride} and G. Fellingham},
	year = {2002},
	keywords = {kenward-roger},
	pages = {512--524}
}


@article{cordeiro_improved_1994,
	title = {Improved Likelihood Ratio Tests for Dispersion Models},
	volume = {62},
	issn = {03067734},
	url = {http://www.jstor.org/stable/1403512},
	doi = {10.2307/1403512},
	abstract = {In this paper we discuss improved likelihood ratio tests for both the parameters in the systematic component and the dispersion parameter in the class of dispersion models {(J?rgensen,} 1987a). General formulae for the expected likelihood ratio statistic are obtained explicitly in dispersion models, which generalize previous results by Cordeiro (1983, 1985, 1987) and Cordeiro \& Paula (1989a). The practical use of the formulae is that we can derive closed-form Bartlett corrections for these models when the information matrix has a closed-form. Various Bartlett corrections are given for special models. The formulae have advantages for numerical purposes because they require only simple operations on matrices. Algebraically, they may be handled within computer systems such as {REDUCE.} Some numerical examples involving real data clarify the use of these formulae. /// Dans cet article nous \'{e}tudions des crit\`{e}res am\'{e}lior\'{e}s du rapport de vraisemblance aussi bien pour les param\`{e}tres dans la composante syst\'{e}matique que pour le param\`{e}tre de dispersion dans la classe des mod\`{e}les de dispersion {(J?rgensen,} 1987a). Des formules g\'{e}n\'{e}rales pour la statistique du rapport de vraisemblance prevue sont obtenues explicitement, dans des mod\`{e}les de dispersion, qui g\'{e}n\'{e}ralisent des r\'{e}sultats anterieurs de Cordeiro (1983, 1985, 1987) et Cordeiro \& Paula (1989a). L'utilisation pratique des formules vient de ce qu'on peut d\'{e}river des corrections de Bartlett en forme ferm\'{e}e por ces mod\`{e}les quand la matrice d'information a forme ferm\'{e}e. On donne plusieurs corrections de Bartlett pour des mod\`{e}les particuliers. Les formules ont des avantages pour les calculs num\'{e}riques parce qu'ils n'exigent que des op\'{e}rations simples sur les matrices. Elles peuvent \^{e}tre trait\'{e}es alg\'{e}briquement dans des syst\`{e}mes d'ordinnateurs comme {REDUCE.} Quelques exemples num\'{e}riques, avec des donn\'{e}es r\'{e}elles, illustrent l'usage de ces formules.},
	number = {2},
	journal = {International Statistical Review / Revue Internationale de Statistique},
	author = {Gauss M. Cordeiro and Gilberto A. Paula and Denise A. Botter},
	year = {1994},
	pages = {257--274}
}

@article{cordeiro_note_1998,
	title = {A note on {Bartlett}-type correction for the first few moments of test statistics},
	volume = {71},
	issn = {0378-3758},
	url = {http://www.sciencedirect.com/science/article/B6V0M-3V5CVRT-M/2/190f68a684dd08c569a7836ff59568e4},
	doi = {10.1016/S0378-3758(98)00005-6},
	abstract = {The purpose of this paper is to propose a simple method for obtaining Bartlett-type corrections for the first few moments of test statistics which are asymptotically distributed as chi-squared. The method proposed here only requires knowledge of the terms of the expansions to such moments. Some applications of our main result are considered.},
	number = {1-2},
	journal = {Journal of Statistical Planning and Inference},
	author = {Gauss M. Cordeiro and Silvia L. P. Ferrari},
	month = aug,
	year = {1998},
	keywords = {Bartlett correction, Bartlett-type correction, Chi-squared distribution, Method of moments, Score statistics},
	pages = {261--269}
}

@ARTICLE{GoldmanWhelan2000,
  author = {Goldman, Nick and Whelan, Simon},
  title = {Statistical Tests of Gamma-Distributed Rate Heterogeneity in Models
	of Sequence Evolution in Phylogenetics},
  journal = {Molecular Biology and Evolution},
  year = {2000},
  volume = {17},
  pages = {975-978},
  number = {6},
  owner = {ben},
  timestamp = {2006.12.06}
}

@article{leamer_tantalus_2010,
	title = {Tantalus on the Road to Asymptopia},
	volume = {24},
	issn = {0895-3309},
	url = {http://www.aeaweb.org/articles.php?doi=10.1257/jep.24.2.31},
	doi = {10.1257/jep.24.2.31},
	number = {2},
	journal = {Journal of Economic Perspectives},
	author = {Edward E Leamer},
	year = {2010},
	pages = {31--46}
}


@article{hurvich_regression_1989,
	title = {Regression and time series model selection in small samples},
	volume = {76},
	url = {http://biomet.oxfordjournals.org/content/76/2/297.abstract},
	doi = {10.1093/biomet/76.2.297},
	abstract = {A bias correction to the Akaike information criterion, {AIC,} is derived for regression and autoregressive time series models. The correction is of particular use when the sample size is small, or when the number of fitted parameters is a moderate to large fraction of the sample size. The corrected method, called {AICC,} is asymptotically efficient if the true model is infinite dimensional. Furthermore, when the true model is of finite dimension, {AICC} is found to provide better model order choices than any other asymptotically efficient method. Applications to nonstationary autoregressive and mixed autoregressive moving average time series models are also discussed.},
	number = {2},
	journal = {Biometrika},
	author = {Clifford M. Hurvich and Chih-Ling Tsai},
	month = jun,
	year = {1989},
	pages = {297 --307}
}

@article{richards_testing_2005,
	title = {Testing ecological theory using the information-theoretic approach: examples and cautionary results},
	volume = {86},
	doi = {10.1890/05-0074},
	number = {10},
	journal = {Ecology},
	author = {Shane A. Richards},
	year = {2005},
	pages = {2805--2814}
}


@book{greven_non-standard_2008,
	address = {G\"{o}ttingen, Germany},
	title = {{Non-Standard} Problems in Inference for Additive and Linear Mixed Models},
	isbn = {3867274916},
	url = {http://www.cuvillier.de/flycms/en/html/30/-UickI3zKPS,3cEY=/Buchdetails.html?SID=wVZnpL8f0fbc},
	publisher = {Cuvillier Verlag},
	author = {Sonja Greven},
	year = {2008}
},

@article{greven_behaviour_2010,
	title = {On the Behaviour of Marginal and Conditional {Akaike} Information Criteria in Linear Mixed Models},
	volume = {97},
	url = {http://www.bepress.com/jhubiostat/paper202/},
	abstract = {In linear mixed models, model selection frequently includes the selection of random effects. Two versions of the Akaike information criterion {(AIC)} have been
used, based either on the marginal or on the conditional distribution. We show that the marginal {AIC} is no longer an asymptotically unbiased estimator of the Akaike information, and in fact favours smaller models without random effects. For the conditional {AIC,} we show that ignoring estimation uncertainty in the random effects covariance matrix, as is common practice, induces a bias that leads to the
selection of any random effect not predicted to be exactly zero. We derive an analytic representation of a corrected version of the conditional {AIC,} which avoids
the high computational cost and imprecision of available numerical approximations.
An implementation in an R package is provided. All theoretical results are
illustrated in simulation studies, and their impact in practice is investigated in an
analysis of childhood malnutrition in Zambia.},
	number = {4},
	journal = {Biometrika},
	author = {Sonja Greven and Thomas Kneib},
	year = {2010},
	pages = {773--789}
}

@article{spiegelhalter_bayesian_2002,
	title = {Bayesian measures of model complexity and fit},
	volume = {64},
	journal = {Journal of the Royal Statistical Society B},
	author = {D. J. Spiegelhalter and N. Best and B. P. Carlin and A. Van der Linde},
	year = {2002},
	pages = {583--640}
}


@article{jiang_fence_2008,
	title = {Fence methods for mixed model selection},
	volume = {36},
	issn = {0090-5364},
	url = {http://projecteuclid.org/euclid.aos/1216237296},
	doi = {10.1214/07-AOS517},
	abstract = {Many model search strategies involve trading off model fit with model complexity in a penalized goodness of fit measure. Asymptotic properties for these types of procedures in settings like linear regression and {ARMA} time series have been studied, but these do not naturally extend to nonstandard situations such as mixed effects models, where simple definition of the sample size is not meaningful. This paper introduces a new class of strategies, known as fence methods, for mixed model selection, which includes linear and generalized linear mixed models. The idea involves a procedure to isolate a subgroup of what are known as correct models (of which the optimal model is a member). This is accomplished by constructing a statistical fence, or barrier, to carefully eliminate incorrect models. Once the fence is constructed, the optimal model is selected from among those within the fence according to a criterion which can be made flexible. In addition, we propose two variations of the fence. The first is a stepwise procedure to handle situations of many predictors; the second is an adaptive approach for choosing a tuning constant. We give sufficient conditions for consistency of fence and its variations, a desirable property for a good model selection procedure. The methods are illustrated through simulation studies and real data analysis.},
	number = {4},
	journal = {The Annals of Statistics},
	author = {Jiming Jiang},
	month = aug,
	year = {2008},
	pages = {1669--1692}
}


@article{sung_monte_2007,
	title = {Monte {Carlo} likelihood inference for missing data models},
	volume = {35},
	issn = {0090-5364},
	url = {http://projecteuclid.org/euclid.aos/1185303995},
	doi = {10.1214/009053606000001389},
	abstract = {We describe a Monte Carlo method to approximate the maximum likelihood estimate {(MLE)}, when there are missing data and the observed data likelihood is not available in closed form. This method uses simulated missing data that are independent and identically distributed and independent of the observed data. Our Monte Carlo approximation to the {MLE} is a consistent and asymptotically normal estimate of the minimizer θ* of the {Kullback–Leibler} information, as both Monte Carlo and observed data sample sizes go to infinity simultaneously. Plug-in estimates of the asymptotic variance are provided for constructing confidence regions for θ*. We give {Logit–Normal} generalized linear mixed model examples, calculated using an R package.},
	number = {3},
	journal = {The Annals of Statistics},
	author = {Sung, Yun Ju and Geyer, Charles J.},
	month = jul,
	year = {2007},
	pages = {990--1011}
}

@article{booth_maximizing_1999,
	title = {Maximizing Generalized Linear Mixed Model Likelihoods with an Automated {Monte} {Carlo} {EM} Algorithm},
	volume = {61},
	url = {http://links.jstor.org/sici?sici=1369-7412(1999)61%3A1%3C265%3AMGLMML%3E2.0.CO%3B2-C},
	doi = {10.1111/1467-9868.00176},
	number = {1},
	journal = {Journal of the Royal Statistical Society. Series B},
	author = {Booth, James G. and Hobert, James P.},
	year = {1999},
	keywords = {glmm},
	pages = {265--285}
},

@article{booth_negative_2003,
	title = {Negative binomial loglinear mixed models},
	volume = {3},
	url = {http://dx.doi.org/10.1191/1471082x03st058oa},
	number = {3},
	journal = {Statistical Modelling},
	author = {Booth, James and Casella, George and Friedl, Herwig and Hobert, James},
	year = {2003},
	keywords = {glmm},
	pages = {179--191}
}


@Article{Rue+2009,
  author = 	 {H. Rue and S. Martino and N. Chopin},
  title = 	 {Gaussian models using integrated nested {Laplace} approximations (with discussion)},
  journal = 	 {Journal of the Royal
Statistical Society, Series B},
  year = 	 {2009},
  volume =	 {71},
  number =	 {2},
  pages =	 {319-392}
}


@article{schielzeth_simple_2010,
	title = {Simple means to improve the interpretability of regression coefficients},
	url = {http://dx.doi.org/10.1111/j.2041-210X.2010.00012.x},
	doi = {10.1111/j.2041-210X.2010.00012.x},
	abstract = {1. Linear regression models are an important statistical tool in evolutionary and ecological studies. Unfortunately, these models often yield some uninterpretable estimates and hypothesis tests, especially when models contain interactions or polynomial terms. Furthermore, the standard errors for treatment groups, although often of interest for including in a publication, are not directly available in a standard linear model. 2. Centring and standardization of input variables are simple means to improve the interpretability of regression coefficients. Further, refitting the model with a slightly modified model structure allows extracting the appropriate standard errors for treatment groups directly from the model. 3. Centring will make main effects biologically interpretable even when involved in interactions and thus avoids the potential misinterpretation of main effects. This also applies to the estimation of linear effects in the presence of polynomials. Categorical input variables can also be centred and this sometimes assists interpretation. 4. Standardization (z-transformation) of input variables results in the estimation of standardized slopes or standardized partial regression coefficients. Standardized slopes are comparable in magnitude within models as well as between studies. They have some advantages over partial correlation coefficients and are often the more interesting standardized effect size. 5. The thoughtful removal of intercepts or main effects allows extracting treatment means or treatment slopes and their appropriate standard errors directly from a linear model. This provides a simple alternative to the more complicated calculation of standard errors from contrasts and main effects. 6. The simple methods presented here put the focus on parameter estimation (point estimates as well as confidence intervals) rather than on significance thresholds. They allow fitting complex, but meaningful models that can be concisely presented and interpreted. The presented methods can also be applied to generalised linear models {(GLM)} and linear mixed models.},
	volume = {1},
pages = {103-113},
	journal = {Methods in Ecology and Evolution},
	author = {Schielzeth, Holger},
	year = {2010}
}


@inproceedings{venables_exegeses_1998,
	address = {Washington, {DC}},
	booktitle = {1998 International {S-PLUS} User Conference},
	title = {Exegeses on Linear Models},
	url = {http://www.stats.ox.ac.uk/pub/MASS3/Exegeses.pdf},
	author = {Venables, W. N},
	year = {1998}
}

@article{whittingham_why_2006,
	title = {Why do we still use stepwise modelling in ecology and behaviour?},
	volume = {75},
	number = {5},
	journal = {Journal of Animal Ecology},
	author = {Whittingham, Mark J. and Stephens, Philip A. and Bradbury, Richard B. and Freckleton, Robert P.},
	year = {2006},
	pages = {1182--1189}
}


@book{harrell_regression_2001,
	title = {Regression Modeling Strategies},
	isbn = {0387952322},
	publisher = {Springer},
	author = {Harrell, Frank},
	year = {2001}
}

@book{hardin_generalized_2007,
	title = {Generalized linear models and extensions},
	isbn = {9781597180146},
	publisher = {Stata Press},
	author = {Hardin, James William and Hilbe, Joseph},
	month = feb,
	year = {2007}
}


@article{robinson_that_1991,
	title = {That {BLUP} is a Good Thing: The Estimation of Random Effects},
	volume = {6},
	issn = {0883-4237},
	shorttitle = {That {BLUP} is a Good Thing},
	url = {http://www.jstor.org/stable/2245695},
	abstract = {In animal breeding, Best Linear Unbiased Prediction, or {BLUP}, is a technique for estimating genetic merits. In general, it is a method of estimating random effects. It can be used to derive the Kalman filter, the method of Kriging used for ore reserve estimation, credibility theory used to work out insurance premiums, and Hoadley's quality measurement plan used to estimate a quality index. It can be used for removing noise from images and for small-area estimation. This paper presents the theory of {BLUP}, some examples of its application and its relevance to the foundations of statistics. Understanding of procedures for estimating random effects should help people to understand some complicated and controversial issues about fixed and random effects models and also help to bridge the apparent gulf between the Bayesian and Classical schools of thought.},
	number = {1},
	journal = {Statistical Science},
	author = {Robinson, G. K.},
	month = feb,
	year = {1991},
	pages = {15--32}
}

@BOOK{GotelliEllison2004,
  title = {A Primer of Ecological Statistics},
  publisher = {Sinauer},
  year = {2004},
  author = {Nicholas J. Gotelli and Aaron M. Ellison},
  address = {Sunderland, MA}
}


@article{bolker_generalized_2009,
	title = {Generalized linear mixed models: a practical guide for ecology and evolution},
	volume = {24},
	issn = {0169-5347},
	shorttitle = {Generalized linear mixed models},
	url = {http://www.sciencedirect.com/science/article/B6VJ1-4VGKHJP-1/2/35970065c78c14ad30bf71bd1d5b452e},
	doi = {10.1016/j.tree.2008.10.008},
	abstract = {How should ecologists and evolutionary biologists analyze nonnormal data that involve random effects? Nonnormal data such as counts or proportions often defy classical statistical procedures. Generalized linear mixed models {(GLMMs)} provide a more flexible approach for analyzing nonnormal data when random effects are present. The explosion of research on {GLMMs} in the last decade has generated considerable uncertainty for practitioners in ecology and evolution. Despite the availability of accurate techniques for estimating {GLMM} parameters in simple cases, complex {GLMMs} are challenging to fit and statistical inference such as hypothesis testing remains difficult. We review the use (and misuse) of {GLMMs} in ecology and evolution, discuss estimation and inference and summarize [`]best-practice' data analysis procedures for scientists facing this challenge.},
	journal = {Trends in Ecology \& Evolution},
	author = {Bolker, Benjamin M. and Brooks, Mollie E. and Clark, Connie J. and Geange, Shane W. and Poulsen, John R. and Stevens, M. Henry H. and White, {Jada-Simone} S.},
	year = {2009},
	pages = {127--135}
}


@article{latimer_hierarchical_2009,
	title = {Hierarchical models facilitate spatial analysis of large data sets: a case study on invasive plant species in the northeastern United States},
	volume = {12},
	shorttitle = {Hierarchical models facilitate spatial analysis of large data sets},
	number = {2},
	journal = {Ecology Letters},
	author = {Latimer, A. M. and Banerjee, S. and Sang Jr, H. and Mosher, E. S. and Silander Jr, J. A.},
	year = {2009},
	pages = {144-154}
}

@article{van2009simple,
  title={A simple method for distinguishing within-versus between-subject effects using mixed models},
  author={van de Pol, M. and Wright, J.},
  journal={Animal Behaviour},
  volume={77},
  number={3},
  pages={753--758},
  year={2009},
  publisher={Elsevier}
}


@article{ohara_not_2010,
	title = {Do not log-transform count data},
	volume = {1},
	issn = {{2041-210X}},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2010.00021.x/abstract},
	doi = {10.1111/j.2041-210X.2010.00021.x},
	abstract = {1. Ecological count data (e.g. number of individuals or species) are often log-transformed to satisfy parametric test assumptions.},
	number = {2},
	journal = {Methods in Ecology and Evolution},
	author = {{O'Hara}, Robert B. and Kotze, D. Johan},
	month = jun,
	year = {2010},
	keywords = {generalized linear models, Linear Models, overdispersion, Poisson, transformation},
	pages = {118--122}
}


@article{warton_arcsine_2011,
	title = {The arcsine is asinine: the analysis of proportions in ecology},
	volume = {92},
	issn = {0012-9658},
	shorttitle = {The arcsine is asinine},
	url = {http://www.esajournals.org/doi/full/10.1890/10-0340.1},
	doi = {10.1890/10-0340.1},
	journal = {Ecology},
	author = {Warton, David I. and Hui, Francis K. C.},
	month = jan,
	year = {2011},
	pages = {3--10}
}

@Article{WilkinsonRogers1973,
  author = 	 {G. N. Wilkinson and C. E. Rogers},
  title = 	 {Symbolic Description of Factorial Models for Analysis of Variance},
  journal = 	 {Applied Statistics},
  year = 	 {1973},
  volume =	 {22},
  number =	 {3},
  pages =	 {392-399},
  doi = {10.2307/2346786}
}

@book{gelman_data_2006,
	address = {Cambridge, England},
	title = {Data Analysis Using Regression and {Multilevel/Hierarchical} Models},
	url = {http://www.stat.columbia.edu/~gelman/arm/},
	publisher = {Cambridge University Press},
	author = {Gelman, Andrew and Hill, Jennifer},
	year = {2006},
	keywords = {uploaded}
}

@BOOK{Crawley2002,
  title = {Statistical Computing: An Introduction to Data Analysis using {S-PLUS}},
  publisher = {John Wiley \& Sons},
  year = {2002},
  author = {Michael J. Crawley},
  isbn = {0-471-56040-5}
}


@article{banta_comprehensive_2010,
	title = {A comprehensive test of the 'limiting resources' framework applied to plant tolerance to apical meristem damage},
	volume = {119},
	issn = {1600-0706},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/j.1600-0706.2009.17726.x/abstract},
	doi = {10.1111/j.1600-0706.2009.17726.x},
	abstract = {Tolerance to apical meristem damage {(AMD)} is a form of plant defense against herbivory. Theoretical models come to different conclusions about the effects of inorganic soil nutrient levels on tolerance to {AMD}, and different plants have shown different relationships between these variables. To assign some order to these disparate patterns and to resolve conflicts among the models, the ‘limiting resources model’ {(LRM)} was developed. However, we believe that the {LRM} is actually comprised of several different models, which we describe. Our study marks the first comprehensive and simultaneous test of the entire {LRM} framework, treating it explicitly as separate models, which also evaluates the models’ underlying assumptions. We studied tolerance to {AMD} in laboratory-reared natural populations of Arabidopsis thaliana from three different regions of Europe, spanning a wide latitudinal gradient. We show that, in different populations of this species, basic responses to nutrients and damage are best described by different models, which are based on different assumptions and make different predictions. This demonstrates the need for complexity in our explanations, and suggests that no one existing model can account for all relationships between tolerance to {AMD} and nutrients. Our results also demonstrate that fruit production can provide a misleading approximation of fitness in A. thaliana, contrary to the common assumption in the literature.},
	number = {2},
	journal = {Oikos},
	author = {Banta, Joshua A. and Stevens, Martin H. H. and Pigliucci, Massimo},
	month = feb,
	year = {2010},
	pages = {359--369}
}

@Article{JohnsonRaven1973,
  author = 	 {Michael P. Johnson and Peter H. Raven},
  title = 	 {Species Number and Endemism: The {Gal{\'a}pagos} Archipelago Revisited},
  journal = 	 {Science},
  year = 	 {1973},
  volume =	 {179},
  number =	 {4076},
  pages =	 {893-895},
  doi = {10.1126/science.179.4076.893},
  url =  {http://www.sciencemag.org/content/179/4076/893.short}
}


@article{pregibon_goodness_1980,
	title = {Goodness of Link Tests for Generalized Linear Models},
	volume = {29},
	issn = {0035-9254},
	url = {http://www.jstor.org/stable/2346405},
	doi = {10.2307/2346405},
	abstract = {Data analytic procedures are proposed to examine the adequacy of the hypothesized link used in fitting a generalized linear model. Through model expansion and linearization, tests and estimation techniques are provided. These procedures, along with the release of {GLIM3}, enable the user to examine routinely and objectively the fit of an hypothesized model. Examples are presented to illustrate the testing and fitting procedure.},
	number = {1},
	journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)},
	author = {Pregibon, Daryl},
	month = jan,
	year = {1980},
	pages = {15--14}
}


@ARTICLE{Tiwari+2006,
  author = {Tiwari, Manjula and Bjorndal, Karen A. and Bolten, Alan B. and Bolker,
	Benjamin M.},
  title = {Evaluation of density-dependent processes and green turtle \emph{Chelonia mydas} hatchling production at {Tortuguero}, {Costa Rica}},
  journal = {Marine Ecology Progress Series},
  year = {2006},
  volume = {326},
  pages = {283-293}
}


@article{schelldorfer_glmmlasso:_2011,
	title = {{GLMMLasso:} An Algorithm for {High-Dimensional} Generalized Linear Mixed Models Using {L1-Penalization}},
	shorttitle = {{GLMMLasso}},
	journal = {Arxiv preprint {arXiv:1109.4003}},
	author = {Schelldorfer, J. and Bühlmann, P.},
	year = {2011},
       url = {http://arxiv.org/abs/1109.4003}
}


@article{zhang_fitting_2011,
	title = {On fitting generalized linear mixed-effects models for binary responses using different statistical packages},
	issn = {1097-0258},
	url = {http://onlinelibrary.wiley.com/doi/10.1002/sim.4265/abstract},
	doi = {10.1002/sim.4265},
        year = {2011},
	abstract = {The generalized linear mixed-effects model {(GLMM)} is a popular paradigm to extend models for cross-sectional data to a longitudinal setting. When applied to modeling binary responses, different software packages and even different procedures within a package may give quite different results. In this report, we describe the statistical approaches that underlie these different procedures and discuss their strengths and weaknesses when applied to fit correlated binary responses. We then illustrate these considerations by applying these procedures implemented in some popular software packages to simulated and real study data. Our simulation results indicate a lack of reliability for most of the procedures considered, which carries significant implications for applying such popular software packages in practice. Copyright © 2011 John Wiley \& Sons, Ltd.},
	journal = {Statistics in Medicine},
	author = {Hui Zhang and Naiji Lu and Chanyong Feng and Sally W. Thurston and Xia, Yinglin and Zhu, Liang and Tu, Xin M},
	keywords = {{GLIMMIX}, integral approximation, linearization, lme4, {NLMIXED}, R, {SAS}, {ZELIG}}
}


@article{self_asymptotic_1987,
	title = {Asymptotic Properties of Maximum Likelihood Estimators and Likelihood Ratio Tests under Nonstandard Conditions},
	volume = {82},
	issn = {0162-1459, 1537-{274X}},
	url = {http://amstat.tandfonline.com/doi/abs/10.1080/01621459.1987.10478472#.UpNeOVQ9sVI},
	doi = {10.1080/01621459.1987.10478472},
	number = {398},
	urldate = {2013-11-25},
	journal = {Journal of the American Statistical Association},
	author = {Self, Steven G. and Liang, Kung-Yee},
	month = jun,
	year = {1987},
	pages = {605--610}
}


@article{davis_semiparametric_1991,
	title = {Semi-parametric and non-parametric methods for the analysis of repeated measurements with applications to clinical trials},
	volume = {10},
	issn = {1097-0258},
	url = {http://onlinelibrary.wiley.com/doi/10.1002/sim.4780101210/abstract},
	doi = {10.1002/sim.4780101210},
	abstract = {Techniques applicable for the analysis of longitudinal data when the response variable is non-normal are not nearly as comprehensive as for normally-distributed outcomes. However, there have been several recent developments. Semi-parametric and non-parametric methodology for the analysis of repeated measurements is reviewed. The commonly encountered design in which, for each subject, one assesses a univariate response variable at multiple fixed time points, is considered. The types of outcomes considered include binary, ordered categorical, and continuous (but extremely non-normal) response variables. All of the methods considered allow for incomplete data due to the occurrence of missing observations. In addition, discrete and/or continuous covariates, which may be time-dependent, are accommodated by some of the approaches. The methods are demonstrated using data from three clinical trials.},
	number = {12},
	journal = {Statistics in Medicine},
	author = {Davis, Charles S},
	month = dec,
	year = {1991},
	pages = {1959--1980}
}


@article{fournier_ad_2011,
	title = {{AD} Model Builder: using automatic differentiation for statistical inference of highly parameterized complex nonlinear models},
	issn = {1055-6788},
	shorttitle = {{AD} Model Builder},
	url = {http://www.tandfonline.com/doi/abs/10.1080/10556788.2011.597854},
	doi = {10.1080/10556788.2011.597854},
	abstract = {Many criteria for statistical parameter estimation, such as maximum likelihood, are formulated as a nonlinear optimization problem. Automatic Differentiation Model Builder {(ADMB)} is a programming framework based on automatic differentiation, aimed at highly nonlinear models with a large number of parameters. The benefits of using {AD} are computational efficiency and high numerical accuracy, both crucial in many practical problems. We describe the basic components and the underlying philosophy of {ADMB}, with an emphasis on functionality found in no other statistical software. One example of such a feature is the generic implementation of Laplace approximation of high-dimensional integrals for use in latent variable models. We also review the literature in which {ADMB} has been used, and discuss future development of {ADMB} as an open source project. Overall, the main advantages of {ADMB} are flexibility, speed, precision, stability and built-in methods to quantify uncertainty.
Many criteria for statistical parameter estimation, such as maximum likelihood, are formulated as a nonlinear optimization problem. Automatic Differentiation Model Builder {(ADMB)} is a programming framework based on automatic differentiation, aimed at highly nonlinear models with a large number of parameters. The benefits of using {AD} are computational efficiency and high numerical accuracy, both crucial in many practical problems. We describe the basic components and the underlying philosophy of {ADMB}, with an emphasis on functionality found in no other statistical software. One example of such a feature is the generic implementation of Laplace approximation of high-dimensional integrals for use in latent variable models. We also review the literature in which {ADMB} has been used, and discuss future development of {ADMB} as an open source project. Overall, the main advantages of {ADMB} are flexibility, speed, precision, stability and built-in methods to quantify uncertainty.},
	journal = {Optimization Methods and Software},
	author = {Fournier, David   A. and Skaug, Hans   J. and Ancheta, Johnoel and Ianelli, James and Magnusson, Arni and Maunder, Mark   N. and Nielsen, Anders and Sibert, John},
	year = {2011},
	pages = {1--17}
}


@misc{plummer_jags:_2003,
	title = {{JAGS:} A program for analysis of {Bayesian} graphical models using {Gibbs} sampling},
	shorttitle = {{JAGS}},
	url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.13.3406},
	author = {Plummer, Martyn},
	year = {2003}
}

  @Manual{blme,
    title = {blme: Bayesian Linear Mixed-Effects models},
    author = {Vincent Dorie},
    year = {2011},
    note = {R package version 0.01-4},
    url = {http://CRAN.R-project.org/package=blme},
  }

@BOOK{Wilkinson1999,
  title = {The grammar of graphics},
  publisher = {Springer},
  year = {1999},
  author = {Leland Wilkinson},
  address = {New York}
}

 @Book{wickham2009,
    author = {Hadley Wickham},
    title = {{ggplot2}: elegant graphics for data analysis},
    publisher = {Springer New York},
    year = {2009},
    isbn = {978-0-387-98140-6},
    url = {http://had.co.nz/ggplot2/book},
  }

@article{murtaugh_simplicity_2007,
	title = {Simplicity and Complexity in Ecological Data Analysis},
	volume = {88},
	url = {http://www.esajournals.org/doi/abs/10.1890/0012-9658%282007%2988%5B56%3ASACIED%5D2.0.CO%3B2},
	number = {1},
	journal = {Ecology},
	author = {Murtaugh, Paul A},
	year = {2007},
	pages = {56--62}
}

@article{dormann_methods_2007,
	title = {Methods to account for spatial autocorrelation in the analysis of species distributional data: a review},
	volume = {30},
	url = {http://dx.doi.org/10.1111/j.2007.0906-7590.05171.x},
	doi = {10.1111/j.2007.0906-7590.05171.x},
	abstract = {Species distributional or trait data based on range map (extent-of-occurrence) or atlas survey data often display spatial autocorrelation, i.e. locations close to each other exhibit more similar values than those further apart. If this pattern remains present in the residuals of a statistical model based on such data, one of the key assumptions of standard statistical analyses, that residuals are independent and identically distributed (i.i.d), is violated. The violation of the assumption of i.i.d. residuals may bias parameter estimates and can increase type I error rates (falsely rejecting the null hypothesis of no effect). While this is increasingly recognised by researchers analysing species distribution data, there is, to our knowledge, no comprehensive overview of the many available spatial statistical methods to take spatial autocorrelation into account in tests of statistical significance. Here, we describe six different statistical approaches to infer correlates of species' distributions, for both presence/absence (binary response) and species abundance data (poisson or normally distributed response), while accounting for spatial autocorrelation in model residuals: autocovariate regression; spatial eigenvector mapping; generalised least squares; (conditional and simultaneous) autoregressive models and generalised estimating equations. A comprehensive comparison of the relative merits of these methods is beyond the scope of this paper. To demonstrate each method's implementation, however, we undertook preliminary tests based on simulated data. These preliminary tests verified that most of the spatial modeling techniques we examined showed good type I error control and precise parameter estimates, at least when confronted with simplistic simulated data containing spatial autocorrelation in the errors. However, we found that for presence/absence data the results and conclusions were very variable between the different methods. This is likely due to the low information content of binary maps. Also, in contrast with previous studies, we found that autocovariate methods consistently underestimated the effects of environmental controls of species distributions. Given their widespread use, in particular for the modelling of species presence/absence data (e.g. climate envelope models), we argue that this warrants further study and caution in their use. To aid other ecologists in making use of the methods described, code to implement them in freely available software is provided in an electronic appendix.},
	number = {5},
	journal = {Ecography},
	author = {Dormann, Carsten F. and Jana M. {McPherson} and {{Miguel} B. Araújo} and Roger Bivand and Janine Bolliger and Gudrun Carl and Richard G. Davies and Alexandre Hirzel and Walter Jetz and W. Daniel Kissling and Ingolf Kühn and Ralf Ohlemüller and Pedro R. Peres-Neto and Björn Reineking and Boris Schröder and Frank M. Schurr and Robert Wilson},
	year = {2007},
	pages = {609--628}
}


@article{ponciano_hierarchical_2009,
	title = {Hierarchical Models in Ecology: Confidence Intervals, Hypothesis Testing, and Model Selection Using Data Cloning},
	volume = {90},
	issn = {0012-9658},
	shorttitle = {Hierarchical Models in Ecology},
	url = {http://www.jstor.org/stable/27650990},
	abstract = {Hierarchical statistical models are increasingly being used to describe complex ecological processes. The data cloning ({DC)} method is a new general technique that uses Markov chain Monte Carlo ({MCMC)} algorithms to compute maximum likelihood ({ML)} estimates along with their asymptotic variance estimates for hierarchical models. Despite its generality, the method has two inferential limitations. First, it only provides Wald-type confidence intervals, known to be inaccurate in small samples. Second, it only yields {ML} parameter estimates, but not the maximized likelihood values used for profile likelihood intervals, likelihood ratio hypothesis tests, and information-theoretic model selection. Here we describe how to overcome these inferential limitations with a computationally efficient method for calculating likelihood ratios via data cloning. The ability to calculate likelihood ratios allows one to do hypothesis tests, construct accurate confidence intervals and undertake information-based model selection with hierarchical models in a frequentist context. To demonstrate the use of these tools with complex ecological models, we reanalyze part of Gause's classic Paramecium data with state—space population models containing both environmental noise and sampling error. The analysis results include improved confidence intervals for parameters, a hypothesis test of laboratory replication, and a comparison of the Beverton-Holt and the Ricker growth forms based on a model selection index.},
	number = {2},
	urldate = {2012-02-03},
	journal = {Ecology},
	author = {Ponciano, José Miguel and Taper, Mark L. and Dennis, Brian and Lele, Subhash R.},
	month = feb,
	year = {2009},
	pages = {356--362},
}


@article{pinheiro_unconstrained_1996,
	title = {Unconstrained parametrizations for variance-covariance matrices},
	volume = {6},
	url = {http://dx.doi.org/10.1007/BF00140873},
	doi = {10.1007/BF00140873},
	abstract = {The estimation of variance-covariance matrices through optimization of an objective function, such as a log-likelihood function, is usually a difficult numerical problem. Since the estimates should be positive semi-definite matrices, we must use constrained optimization, or employ a parametrization that enforces this condition. We describe here five different parametrizations for variance-covariance matrices that ensure positive definiteness, thus leaving the estimation problem unconstrained. We compare the parametrizations based on their computational efficiency and statistical interpretability. The results described here are particularly useful in maximum likelihood and restricted maximum likelihood estimation in linear and non-linear mixed-effects models, but are also applicable to other areas of statistics.},
	number = {3},
	urldate = {2010-01-05},
	journal = {Statistics and Computing},
	author = {Pinheiro, José C. and Bates, Douglas M.},
	year = {1996},
	pages = {289--296}
}


@article{stram_variance_1994,
	title = {Variance Components Testing in the Longitudinal Fixed Effects Model},
	volume = {50},
	url = {http://links.jstor.org/sici?sici=0006-341X%28199412%2950%3A4%3C1171%3AVCTITL%3E2.0.CO%3B2-H},
	number = {4},
	journal = {Biometrics},
	author = {Stram, Daniel O and Lee, Jae Won},
	year = {1994},
	pages = {1171--1177}
}

@article{mckeon_multiple_2012,
	title = {Multiple defender effects: synergistic coral defense by mutualist crustaceans},
	volume = {169},
	issn = {0029-8549},
	shorttitle = {Multiple defender effects},
	url = {http://www.springerlink.com/content/nm20758r6557v448/abstract/},
	doi = {10.1007/s00442-012-2275-2},
	abstract = {The majority of our understanding of mutualisms comes from studies of pairwise interactions. However, many hosts support mutualist guilds, and interactions among mutualists make the prediction of aggregate effects difficult. Here, we apply a factorial experiment to interactions of ‘guard’ crustaceans that defend their coral host from seastar predators. Predation was reduced by the presence of mutualists (15\% reduction in predation frequency and 45\% in volume of coral consumed). The frequency of attacks with both mutualists was lower than with a single species, but it did not differ significantly from the expected frequency of independent effects. In contrast, the combined defensive efficacy of both mutualist species reduced the volume of coral tissue lost by 73\%, significantly more than the 38\% reduction expected from independent defensive efforts, suggesting the existence of a cooperative synergy in defensive behaviors of ‘guard’ crustaceans. These emergent ‘multiple defender effects’ are statistically and ecologically analogous to the emergent concept of ‘multiple predator effects’ known from the predation literature.},
	number = {4},
	urldate = {2012-10-18},
	journal = {Oecologia},
	author = {McKeon, C. Seabird and Stier, Adrian and McIlroy, Shelby and Bolker, Benjamin},
	year = {2012},
	keywords = {Biomedical and Life Sciences},
	pages = {1095--1103},
}

@book{dobson_introduction_2008,
	edition = {3},
	title = {An Introduction to Generalized Linear Models, Third Edition},
	isbn = {1584889500},
	publisher = {Chapman and {Hall/CRC}},
	author = {Dobson, Annette J. and Barnett, Adrian},
	month = may,
	year = {2008}
}

@incollection{myers_appendix_2010,
	title = {Appendix {A.6}: Computational Details for {GLMs} for a Noncanonical Link},
	copyright = {Copyright © 2010 John Wiley \& Sons, Inc. All rights reserved.},
	isbn = {9780470556986},
	shorttitle = {Appendix A.6},
	url = {http://onlinelibrary.wiley.com/doi/10.1002/9780470556986.app6/summary},
	language = {en},
	urldate = {2013-09-25},
	booktitle = {Generalized Linear Models},
	publisher = {John Wiley \& Sons, Inc.},
	author = {Myers, Raymond H. and Montgomery, Douglas C. and Vining, G. Geoffrey and Robinson, Timothy J.},
	year = {2010},
	pages = {481-483}
}


@article{marschner_glm2:_2011,
	title = {glm2: Fitting Generalized Linear Models with Convergence Problems},
	volume = {3},
	url = {http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Marschner.pdf},
	number = {2},
	journal = {The R Journal},
	author = {Marschner, Ian C.},
	month = dec,
	year = {2011},
	pages = {12–15}
}


@Article{Kampstra:2008:CSB,
  author =       "Peter Kampstra",
  title =        "Code Snippet: {{\tt Beanplot}}: {A} Boxplot
                 Alternative for Visual Comparison of Distributions",
  journal =      {Journal of Statistical Software},
  volume =       "28",
  number =       "CS-1",
  pages =        "??--??",
  month =        nov,
  year =         "2008",
  CODEN =        "JSSOBK",
  ISSN =         "1548-7660",
  bibdate =      "Wed Aug 25 09:57:41 MDT 2010",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/jstatsoft.bib",
  URL =          "http://www.jstatsoft.org/v28/c01",
  acknowledgement = ack-nhfb,
  fjournal =     "Journal of Statistical Software",
  pubdates =     "Submitted 2008-09-19; Accepted 2008-10-28",
}


@article{augustin_quantile_2012,
	title = {On quantile quantile plots for generalized linear models},
	volume = {56},
	issn = {01679473},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S0167947312000692},
	doi = {10.1016/j.csda.2012.01.026},
	number = {8},
	urldate = {2013-07-01},
	journal = {Computational Statistics \& Data Analysis},
	author = {Augustin, Nicole H. and Sauleau, Erik-André and Wood, Simon N.},
	month = aug,
	year = {2012},
	pages = {2404--2409}
}


@article{hoaglin_poissonness_1980,
	title = {A {Poissonness} Plot},
	volume = {34},
	issn = {0003-1305},
	url = {http://www.jstor.org/stable/2683871},
	doi = {10.2307/2683871},
	abstract = {A graphical technique, similar in spirit to probability plotting, can be used to judge whether a Poisson model is appropriate for an observed frequency distribution. This {"Poissonness} plot" can equally be applied to truncated Poisson situations. It provides a type of robustness for detecting isolated discrepancies in otherwise well-behaved frequency distributions.},
	number = {3},
	urldate = {2012-04-24},
	journal = {The American Statistician},
	author = {Hoaglin, David C.},
	year = {1980},
	pages = {146--149}
}


@article{cessie_goodness_1991,
	title = {A Goodness-of-Fit Test for Binary Regression Models, Based on Smoothing Methods},
	volume = {47},
	copyright = {Copyright © 1991 International Biometric Society},
	issn = {0006-{341X}},
	url = {http://www.jstor.org/stable/2532385},
	doi = {10.2307/2532385},
	abstract = {A new global test statistic for models with continuous covariates and binary response is introduced. The test statistic is based on nonparametric kernel methods. Explicit expressions are given for the mean and variance of the test statistic. Asymptotic properties are considered and approximate corrections due to parameter estimation are presented. Properties of the test statistic are studied by simulation. The goodness-of-fit method is illustrated on data from a Dutch follow-up study on preterm infants. Recommendations for practitioners are given.},
	number = {4},
	urldate = {2013-10-02},
	journal = {Biometrics},
	author = {{le Cessie}, S. and {van Houwelingen}, J. C.},
	month = dec,
	year = {1991},
	pages = {1267--1282}
}


@misc{leek_researcher_2013,
	title = {The researcher degrees of freedom - recipe tradeoff in data analysis},
	url = {http://simplystatistics.org/2013/07/31/the-researcher-degrees-of-freedom-recipe-tradeoff-in-data-analysis/},
	urldate = {2013-10-02},
	journal = {Simply Statistics},
	author = {Leek, Jeff and Peng, Roger and Irizarry, Rafa},
	month = jul,
	year = {2013},
}

@misc{leek_deterministic_2012,
	title = {A deterministic statistical machine},
	url = {http://simplystatistics.org/2012/08/27/a-deterministic-statistical-machine/},
	urldate = {2013-10-02},
	journal = {Simply Statistics},
	author = {Leek, Jeff and Peng, Roger and Irizarry, Rafa},
	month = aug,
	year = {2012}
}


@article{simmons_false-positive_2011,
	title = {False-Positive Psychology Undisclosed Flexibility in Data Collection and Analysis Allows Presenting Anything as Significant},
	volume = {22},
	issn = {0956-7976, 1467-9280},
	url = {http://pss.sagepub.com/content/22/11/1359},
	doi = {10.1177/0956797611417632},
	abstract = {In this article, we accomplish two things. First, we show that despite empirical psychologists’ nominal endorsement of a low rate of false-positive findings (≤ .05), flexibility in data collection, analysis, and reporting dramatically increases actual false-positive rates. In many cases, a researcher is more likely to falsely find evidence that an effect exists than to correctly find evidence that it does not. We present computer simulations and a pair of actual experiments that demonstrate how unacceptably easy it is to accumulate (and report) statistically significant evidence for a false hypothesis. Second, we suggest a simple, low-cost, and straightforwardly effective disclosure-based solution to this problem. The solution involves six concrete requirements for authors and four guidelines for reviewers, all of which impose a minimal burden on the publication process.},
	language = {en},
	number = {11},
	urldate = {2012-05-10},
	journal = {Psychological Science},
	author = {Simmons, Joseph P. and Nelson, Leif D. and Simonsohn, Uri},
	month = nov,
	year = {2011},
	keywords = {disclosure, methodology, motivated reasoning, publication},
	pages = {1359--1366}
}

@article{lesnoff_within-herd_2004,
	title = {Within-herd spread of contagious bovine pleuropneumonia in {Ethiopian} highlands},
	volume = {64},
	issn = {0167-5877},
	url = {http://www.sciencedirect.com/science/article/pii/S0167587704000856},
	doi = {10.1016/j.prevetmed.2004.03.005},
	abstract = {Contagious bovine pleuropneumonia ({CBPP)} is a major threat for cattle health and production in Africa. This disease is caused by the small-colony type of Mycoplasma mycoides subspecies mycoides ({MmmSC).} Transmission occurs from direct and repeated contacts between sick and healthy animals. Veterinary services recently reported a resurgence of {CBPP} in the province of West Wellega, in the Ethiopian highlands. A research program was set up to estimate the epidemiological parameters of the within-herd infection spread. A follow-up survey was implemented in 71 sampled herds of the Boji district (West Wellega province). Fifteen herds were classified as newly infected and used in a serological- and clinical-incidence study. The overall 16-month cumulative sero-incidence risk was 34\%. Clinical cases were recorded for 39\% of the seropositive cattle; case-fatality risk was 13\%. There was no evidence of benefit on infection spread of {CBPP-control} measures used locally by farmers (isolation or antibiotic treatments of sick animals). This might be related to a lack of power in the statistical analyses or to a quality problem for the medications used (and more generally, for health-care delivery in the Boji district).},
	number = {1},
	urldate = {2013-10-03},
	journal = {Preventive Veterinary Medicine},
	author = {Lesnoff, Matthieu and Laval, Géraud and Bonnet, Pascal and Abdicho, Sintayehu and Workalemahu, Asseguid and Kifle, Daniel and Peyraud, Armelle and Lancelot, Renaud and Thiaucourt, François},
	month = jun,
	year = {2004},
	keywords = {Clinical signs, Contagious bovine pleuropneumonia, Ethiopia, Herd monitoring, Within-herd incidence},
	pages = {27--40}
}


@article{kindsvater_male_2012,
	title = {Male diet, female experience, and female size influence maternal investment in swordtails},
	issn = {1045-2249, 1465-7279},
	url = {http://beheco.oxfordjournals.org/content/early/2012/12/16/beheco.ars213},
	doi = {10.1093/beheco/ars213},
	abstract = {Correlations between male phenotype and female investment in offspring size or number may result from either male influences or female responses to male phenotype. Theory has predicted that females may invest differentially in response to mate attractiveness either to take advantage of her partner’s attractiveness or to compensate for it. The outcome can depend on the female’s expected future fitness. Empirical evidence also suggests that females adjust offspring size or fecundity according to the genetic or phenotypic traits of the male. Though the role of future fitness in empirical results is often difficult to assess, female experience has been shown to influence reproductive investment. This result could indicate that experience informs the female’s assessment of current and future mating opportunities (i.e., expected future fitness). Here, we tested whether female experience and mate attractiveness affected offspring size and number in sheepshead swordtails. We manipulated mate attractiveness by feeding males different diets, as females have been shown to prefer well-fed males. We found that females exposed to well-fed males, and then mated to poorly fed males, produced the largest offspring. Clutch size increased more rapidly with female size for females with mates on a poor diet, independent of female experience. Our results suggest that swordtail females elevate their investment in offspring size and number when mated to poorly fed males. These results demonstrate that maternal investment can be influenced by female social environment and experience, with underappreciated consequences for offspring fitness.},
	language = {en},
	urldate = {2013-10-17},
	journal = {Behavioral Ecology},
	author = {Kindsvater, Holly K. and Simpson, Suzanne E. and Rosenthal, Gil G. and Alonzo, Suzanne H.},
	month = dec,
	year = {2013},
volume = 24,
number = 3,
	keywords = {condition dependence, differential allocation, maternal effect, reproductive compensation, Xiphophorus.},
	pages = {691-697}
}

@InProceedings{Phelps1982,
  author = 	 {K. Phelps},
  title = 	 {Use of the complementary log-log function to describe dose-response relationships in insecticide evluation field trials},
  booktitle =	 {GLIM.82:  Proceedings of the International Conference on Generalized Linear Models},
  year =	 {1982},
  editor =	 {R. Gilchrist},
  number =	 {14},
  series =	 {Lecture Notes in Statistics},
  publisher =	 {Springer}
}


@article{firth_bias_1993,
	title = {Bias reduction of maximum likelihood estimates},
	volume = {80},
	issn = {0006-3444, 1464-3510},
	url = {http://biomet.oxfordjournals.org/content/80/1/27},
	doi = {10.1093/biomet/80.1.27},
	abstract = {{SUMMARY} It is shown how, in regular parametric problems, the first-order term is removed from the asymptotic bias of maximum likelihood estimates by a suitable modification of the score function. In exponential families with canonical parameterization the effect is to penalize the likelihood by the Jeffreys invariant prior. In binomial logistic models, Poisson log linear models and certain other generalized linear models, the Jeffreys prior penalty function can be imposed in standard regression software using a scheme of iterative adjustments to the data.},
	language = {en},
	number = {1},
	urldate = {2013-10-23},
	journal = {Biometrika},
	author = {Firth, David},
	month = mar,
	year = {1993},
	keywords = {Asymptotic bias;, Biased estimating equations;, Exponential family;, Generalized linear model;, Jeffreys prior;, Logistic regression;, Modified score;, Penalized likelihood;, Shrinkage},
	pages = {27--38}
}

@article{bliss_calculation_1935,
	title = {The Calculation of the Dosage-Mortality Curve},
	volume = {22},
	issn = {1744-7348},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/j.1744-7348.1935.tb07713.x/abstract},
	doi = {10.1111/j.1744-7348.1935.tb07713.x},
	abstract = {The sigmoid dosage-mortality curve, secured so commonly in toxicity tests upon multicellular organisms, is interpreted as a cumulative normal frequency distribution of the variation among the individuals of a population in their susceptibility to a toxic agent, which susceptibility is inversely proportional to the logarithm of the dose applied. In support of this interpretation is the fact that when dosage is inferred from the observed mortality on the assumption that susceptibility is distributed normally, such inferred dosages, in terms of units called probits, give straight lines when plotted against the logarithm of their corresponding observed dosages. It is shown that this use of the logarithm of the dosage can be interpreted in terms either of the Weber-Fechner law or of the amount of poison fixed by the tissues of the organism. How this transformation to a straight regression line facilitates the precise estimation of the dosage-mortality relationship and its accuracy is considered in detail. Statistical methods are described for taking account of tests which result in 0 or 100 per cent, kill, for giving each determination a weight proportional to its reliability, for computing the position and slope of the transformed dosage-mortality curve, for measuring the goodness of fit of the regression line to the observations by the X2 test, and for calculating the error in position and in slope and their combined effect at any log. dosage. The terminology and procedures are consistent with those used by R. A. Fisher, who has contributed an appendix on the case of zero survivors. Except for a table of common logarithms, all the tables required to utilise the methods described are given either in the present paper or in Fisher's book. A numerical example selected from Strand's experiments upon Tribolium confusum with carbon disulphide has been worked out in detail.},
	language = {en},
	number = {1},
	urldate = {2013-10-27},
	journal = {Annals of Applied Biology},
	author = {Bliss, C. I.},
	year = {1935},
	pages = {134–167}
}

@article{schielzeth_conclusions_2009,
	title = {Conclusions beyond support: overconfident estimates in mixed models},
	volume = {20},
	issn = {1045-2249, 1465-7279},
	shorttitle = {Conclusions beyond support},
	url = {http://beheco.oxfordjournals.org/content/20/2/416},
	doi = {10.1093/beheco/arn145},
	abstract = {Mixed-effect models are frequently used to control for the nonindependence of data points, for example, when repeated measures from the same individuals are available. The aim of these models is often to estimate fixed effects and to test their significance. This is usually done by including random intercepts, that is, intercepts that are allowed to vary between individuals. The widespread belief is that this controls for all types of pseudoreplication within individuals. Here we show that this is not the case, if the aim is to estimate effects that vary within individuals and individuals differ in their response to these effects. In these cases, random intercept models give overconfident estimates leading to conclusions that are not supported by the data. By allowing individuals to differ in the slopes of their responses, it is possible to account for the nonindependence of data points that pseudoreplicate slope information. Such random slope models give appropriate standard errors and are easily implemented in standard statistical software. Because random slope models are not always used where they are essential, we suspect that many published findings have too narrow confidence intervals and a substantially inflated type I error rate. Besides reducing type I errors, random slope models have the potential to reduce residual variance by accounting for between-individual variation in slopes, which makes it easier to detect treatment effects that are applied between individuals, hence reducing type {II} errors as well.},
	language = {en},
	number = {2},
	urldate = {2012-07-27},
	journal = {Behavioral Ecology},
	author = {Schielzeth, Holger and Forstmeier, Wolfgang},
	month = mar,
	year = {2009},
	keywords = {experimental design, maternal effects, mixed-effect models, random regression, repeated measures, type I error},
	pages = {416--420}
}


@article{barr_random_2013,
	title = {Random effects structure for confirmatory hypothesis testing: Keep it maximal},
	volume = {68},
	issn = {0749-{596X}},
	shorttitle = {Random effects structure for confirmatory hypothesis testing},
	url = {http://www.sciencedirect.com/science/article/pii/S0749596X12001180},
	doi = {10.1016/j.jml.2012.11.001},
	abstract = {Linear mixed-effects models ({LMEMs)} have become increasingly prominent in psycholinguistics and related areas. However, many researchers do not seem to appreciate how random effects structures affect the generalizability of an analysis. Here, we argue that researchers using {LMEMs} for confirmatory hypothesis testing should minimally adhere to the standards that have been in place for many decades. Through theoretical arguments and Monte Carlo simulation, we show that {LMEMs} generalize best when they include the maximal random effects structure justified by the design. The generalization performance of {LMEMs} including data-driven random effects structures strongly depends upon modeling criteria and sample size, yielding reasonable results on moderately-sized samples when conservative criteria are used, but with little or no power advantage over maximal models. Finally, random-intercepts-only {LMEMs} used on within-subjects and/or within-items data from populations where subjects and/or items vary in their sensitivity to experimental manipulations always generalize worse than separate F1 and F2 tests, and in many cases, even worse than F1 alone. Maximal {LMEMs} should be the ‘gold standard’ for confirmatory hypothesis testing in psycholinguistics and beyond.},
	number = {3},
	urldate = {2013-09-26},
	journal = {Journal of Memory and Language},
	author = {Barr, Dale J. and Levy, Roger and Scheepers, Christoph and Tily, Harry J.},
	month = apr,
	year = {2013},
	keywords = {Generalization, Linear mixed-effects models, Monte Carlo simulation, statistics},
	pages = {255--278}
}


@book{agresti_categorical_2002,
	address = {Hoboken, {NJ}},
	edition = {2d},
	title = {Categorical Data Analysis},
	isbn = {0-471-36093-7},
	publisher = {Wiley},
	author = {Agresti, Alan},
	year = {2002}
}


@article{bellio_restricted_2011,
	title = {Restricted likelihood inference for generalized linear mixed models},
	volume = {21},
	issn = {0960-3174, 1573-1375},
	url = {http://link.springer.com/article/10.1007/s11222-009-9157-4},
	doi = {10.1007/s11222-009-9157-4},
	abstract = {We aim to promote the use of the modified profile likelihood function for estimating the variance parameters of a {GLMM} in analogy to the {REML} criterion for linear mixed models. Our approach is based on both quasi-Monte Carlo integration and numerical quadrature, obtaining in either case simulation-free inferential results. We will illustrate our idea by applying it to regression models with binary responses or count data and independent clusters, covering also the case of two-part models. Two real data examples and three simulation studies support the use of the proposed solution as a natural extension of {REML} for {GLMMs.} An R package implementing the methodology is available online.},
	language = {en},
	number = {2},
	urldate = {2013-11-04},
	journal = {Statistics and Computing},
	author = {Bellio, Ruggero and Brazzale, Alessandra R.},
	month = apr,
	year = {2011},
	keywords = {Artificial Intelligence (incl. Robotics), Logistic regression, Loglinear model, Mathematics, general, Maximum likelihood estimation, Modified profile likelihood, Numerical integration, Numeric Computing, Statistics and {Computing/Statistics} Programs, Statistics, general, Two-part model, Variance component},
	pages = {173--183}
}

@BOOK{McCarthy2007,
  title = {Bayesian methods for ecology},
  publisher = {Cambridge University Press},
  year = {2007},
  author = {M. McCarthy},
  address = {Cambridge, England}
}


@book{kery_introduction_2010,
	address = {Boston},
	title = {Introduction to {WinBUGS} for ecologists: Bayesian approach to regression, {ANOVA}, mixed models and related analyses},
	isbn = {9780123786050 0123786053 0123786061  9780123786067  1282755668 9781282755666},
	abstract = {Bayesian statistics has exploded into biology and its sub-disciplines such as ecology over the past decade. The free software program {WinBUGS} and its open-source sister {OpenBugs} is currently the only flexible and general-purpose program available with which the average ecologist can conduct their own standard and non-standard Bayesian statistics. Introduction to {WINBUGS} for Ecologists goes right to the heart of the matter by providing ecologists with a comprehensive, yet concise, guide to applying {WinBUGS} to the types of models that they use most often: linear ({LM)}, generalized linear ({GLM)}, linear mixed ({LMM)} and generalized linear mixed models ({GLMM).} Introduction to {WinBUGS} for Ecologists combines the use of simulated data sets "paired" analyses using {WinBUGS} (in a Bayesian framework for analysis) and in R (in a frequentist mode of inference) and uses a very detailed step-by-step tutorial presentation style that really lets the reader repeat every step of the application of a given mode in their own research. - Introduction to the essential theories of key models used by ecologists - Complete juxtaposition of classical analyses in R and Bayesian Analysis of the same models in {WinBUGS} - Provides every detail of R and {WinBUGS} code required to conduct all analyses - Written with ecological language and ecological examples - Companion Web Appendix that contains all code contained in the book, additional material (including more code and solutions to exercises) - Tutorial approach shows ecologists how to implement Bayesian analysis in practical problems that they face.},
	language = {English},
	publisher = {Elsevier},
	author = {Kéry, Marc},
	year = {2010}
}

 @Article{solymos_dclone_2010,
    title = {dclone: Data Cloning in {R}},
    author = {P\'{e}ter S\'{o}lymos},
    journal = {The R Journal},
    year = {2010},
    volume = {2},
    number = {2},
    pages = {29--37},
    url = {http://journal.r-project.org/},
  }

@Manual{Hojsgaard_pbkrtest_2013,
    title = {pbkrtest: Parametric bootstrap and {Kenward Roger} based methods for mixed
model comparison},
    author = {Ulrich Halekoh and Søren Højsgaard},
    year = {2013},
    note = {R package version 0.3-7},
    url = {http://CRAN.R-project.org/package=pbkrtest},
  }

 @Article{Scheipl_size_2008,
    title = {Size and power of tests for a zero random effect variance or polynomial
               regression in additive and linear mixed models.},
    author = {Fabian Scheipl and Sonja Greven and Helmut Kuechenhoff},
    year = {2008},
    journal = {Computational Statistics \& Data Analysis},
    volume = {52},
    number = {7},
    pages = {3283--3299},
  }

@BOOK{BurnhamAnderson2002,
  title = {Model Selection and Multimodel Inference},
  publisher = {Springer},
  year = {2002},
  author = {Burnham, K. P. and Anderson, D. R.},
  address = {New York},
  edition = {2d}
}

@article{ibrahim_fixed_2011,
	title = {Fixed and Random Effects Selection in Mixed Effects Models},
	volume = {67},
	issn = {0006-{341X}},
	doi = {10.1111/j.1541-0420.2010.01463.x},
	abstract = {We consider selecting both fixed and random effects in a general class of mixed effects models using maximum penalized likelihood ({MPL)} estimation along with the smoothly clipped absolute deviation ({SCAD)} and adaptive least absolute shrinkage and selection operator ({ALASSO)} penalty functions. The {MPL} estimates are shown to possess consistency and sparsity properties and asymptotic normality. A model selection criterion, called the {IC(Q)} statistic, is proposed for selecting the penalty parameters (Ibrahim, Zhu, and Tang, 2008, Journal of the American Statistical Association 103, 1648-1658). The variable selection procedure based on {IC(Q)} is shown to consistently select important fixed and random effects. The methodology is very general and can be applied to numerous situations involving random effects, including generalized linear mixed models. Simulation studies and a real data set from a Yale infant growth study are used to illustrate the proposed methodology.},
	language = {English},
	number = {2},
	journal = {Biometrics},
	author = {Ibrahim, Joseph G. and Zhu, Hongtu and Garcia, Ramon I. and Guo, Ruixin},
	month = jun,
	year = {2011},
	note = {{WOS:000292504000017}},
	keywords = {alasso, children born, Cholesky decomposition, covariance matrices, {EM} algorithm, {IC(Q)} criterion, incomplete data, longitudinal data, maltreatment, missing-data, Mixed   effects selection, nonconcave penalized likelihood, oracle properties, penalized likelihood, regression-models, scad, Variable selection},
	pages = {495--503}
}

@article{yu_conditional_2012,
	title = {Conditional Akaike information criterion for generalized linear mixed models},
	volume = {56},
	issn = {0167-9473},
	doi = {10.1016/j.csda.2011.09.012},
	abstract = {In this study, a model identification instrument to determine the variance component structure for generalized linear mixed models ({GLMMS)} is developed based on the conditional Akaike information ({CAI).} In particular, an asymptotically unbiased estimator of the {CAI} (denoted as {CAICC)} is derived as the model selection criterion which takes the estimation uncertainty in the variance component parameters into consideration. The relationship between bias correction and generalized degree of freedom for {GLMMS} is also explored. Simulation results show that the estimator performs well. The proposed criterion demonstrates a high proportion of correct model identification for {GLMMS.} Two sets of real data (epilepsy seizure count data and polio incidence data) are used to illustrate the proposed model identification method. (C) 2011 Elsevier {B.V.} All rights reserved.},
	language = {English},
	number = {3},
	journal = {Computational Statistics \& Data Analysis},
	author = {Yu, Dalei and Yau, Kelvin K. W.},
	month = mar,
	year = {2012},
	note = {{WOS:000298122600015}},
	keywords = {Conditional Akaike information, counts, generalized linear mixed model, Model   identification, Poisson time series, reml estimation, selection, time-series, Variance component},
	pages = {629--644}
}

@article{muller_model_2013,
	title = {Model Selection in Linear Mixed Models},
	volume = {28},
	issn = {0883-4237},
	url = {http://projecteuclid.org/euclid.ss/1369147909},
	doi = {10.1214/12-STS410},
	abstract = {Linear mixed effects models are highly flexible in handling a broad range of data types and are therefore widely used in applications. A key part in the analysis of data is model selection, which often aims to choose a parsimonious model with other desirable properties from a possibly very large set of candidate statistical models. Over the last 5–10 years the literature on model selection in linear mixed models has grown extremely rapidly. The problem is much more complicated than in linear regression because selection on the covariance structure is not straightforward due to computational issues and boundary problems arising from positive semidefinite constraints on covariance matrices. To obtain a better understanding of the available methods, their properties and the relationships between them, we review a large body of literature on linear mixed model selection. We arrange, implement, discuss and compare model selection methods based on four major approaches: information criteria such as {AIC} or {BIC}, shrinkage methods based on penalized loss functions such as {LASSO}, the Fence procedure and Bayesian techniques.},
	language = {{EN}},
	number = {2},
	urldate = {2013-11-04},
	journal = {Statistical Science},
	author = {Müller, Samuel and Scealy, J. L. and Welsh, A. H.},
	month = may,
	year = {2013},
	pages = {135--167}
}


@book{gelman+_BDA,
title={Bayesian Data Analysis},
edition={3},
publisher={Chapman \& Hall},
series={CRC Texts in Statistical Science},
year=2013,
author={Andrew Gelman and John B. Carlin and  Hal S. Stern and David B. Dunson 
 and Aki Vehtari and Donald B. Rubin}
}

@article{pasch_interspecific_2013,
	title = {Interspecific Dominance Via Vocal Interactions Mediates Altitudinal Zonation in Neotropical Singing Mice},
	volume = {182},
	copyright = {Copyright © 2013 The University of Chicago},
	issn = {0003-0147},
	url = {http://www.jstor.org/stable/10.1086/673263},
	doi = {10.1086/673263},
	abstract = {Abstract Interspecific aggression between ecologically similar species may influence geographic limits by mediating competitive exclusion at the range edge. Advertisement signals that mediate competitive interactions within species may also provide social information that contributes to behavioral dominance and spatial segregation among species. We studied the mechanisms underlying altitudinal range limits in Neotropical singing mice (Scotinomys), a genus of muroid rodent in which males vocalize to repel rivals and attract mates. We first delineated replacement zones and described temperature regimes on three mountains in Costa Rica and Panama where Chiriquí singing mice (S. xerampelinus) abruptly replace Alston’s singing mice (S. teguina). Next, we conducted interspecific behavioral trials and reciprocal removal experiments to examine if interspecific aggression mediated species replacement. Finally, we performed reciprocal playback experiments to investigate whether response to song matched competitive interactions. Behavioral trials and removal experiments suggest that S. xerampelinus is behaviorally dominant and excludes S. teguina from higher, cooler altitudes. Playback experiments indicate that subordinate S. teguina is silenced and repelled by heterospecific song, whereas S. xerampelinus responded to heterospecifics with approach and song rates comparable to responses to conspecifics. Thus, interspecific communication reflects underlying dominance and suggests that acoustic signaling contributes to altitudinal zonation of ecologically similar congeners. Our findings implicate the use of social information in structuring spatial distributions of animal communities across landscapes and provide insight into how large-scale patterns are generated by individual interactions.},
	number = {5},
	urldate = {2013-10-10},
	journal = {The American Naturalist},
	author = {Pasch, Bret and Bolker, Benjamin M. and Phelps, Steven M.},
	month = nov,
	year = {2013},
	pages = {E161--E173}
}


@article{chung_nondegenerate_2013,
	title = {A Nondegenerate Penalized Likelihood Estimator for Variance Parameters in Multilevel Models},
        year = 2013,
	issn = {0033-3123, 1860-0980},
	url = {http://link.springer.com/article/10.1007/s11336-013-9328-2},
	doi = {10.1007/s11336-013-9328-2},
	abstract = {Group-level variance estimates of zero often arise when fitting multilevel or hierarchical linear models, especially when the number of groups is small. For situations where zero variances are implausible a priori, we propose a maximum penalized likelihood approach to avoid such boundary estimates. This approach is equivalent to estimating variance parameters by their posterior mode, given a weakly informative prior distribution. By choosing the penalty from the log-gamma family with shape parameter greater than 1, we ensure that the estimated variance will be positive. We suggest a default log-gamma(2,λ) penalty with λ→0, which ensures that the maximum penalized likelihood estimate is approximately one standard error from zero when the maximum likelihood estimate is zero, thus remaining consistent with the data while being nondegenerate. We also show that the maximum penalized likelihood estimator with this default penalty is a good approximation to the posterior median obtained under a noninformative prior. Our default method provides better estimates of model parameters and standard errors than the maximum likelihood or the restricted maximum likelihood estimators. The log-gamma family can also be used to convey substantive prior information. In either case—pure penalization or prior information—our recommended procedure gives nondegenerate estimates and in the limit coincides with maximum likelihood as the number of groups increases.},
	language = {en},
	urldate = {2013-07-30},
	journal = {Psychometrika},
	author = {Chung, Yeojin and Rabe-Hesketh, Sophia and Dorie, Vincent and Gelman, Andrew and Liu, Jingchen},
	keywords = {Assessment, Testing and Evaluation, Bayes modal estimation, hierarchical linear model, Mixed Model, Multilevel model, penalized likelihood, Psychometrics, Statistical Theory and Methods, Statistics for Social Science, Behavorial Science, Education, Public Policy, and Law, variance estimation, weakly informative prior},
	pages = {1--25}
}


@article{ives_generalized_2011,
	title = {Generalized linear mixed models for phylogenetic analyses of community structure},
	volume = {81},
	issn = {0012-9615},
	url = {http://www.esajournals.org/doi/abs/10.1890/10-1264.1},
	doi = {10.1890/10-1264.1},
	abstract = {There is growing appreciation that ecological communities are phylogenetically structured, with phylogenetically closely related species either more or less likely to co-occur at the same site. Here, we present phylogenetic generalized linear mixed models ({PGLMMs)} that can statistically test a wide variety of phylogenetic patterns in community structure. In contrast to most current statistical approaches that rely on community metrics and randomization tests, {PGLMMs} are model-based statistics that fit observed presence/absence data to underlying hypotheses about the distributions of species among communities. We built four {PGLMMs} to address (1) phylogenetic patterns in community composition, (2) phylogenetic variation in species sensitivities to environmental gradients among communities, (3) phylogenetic repulsion in which closely related species are less likely to co-occur, and (4) trait-based variation in species sensitivities to environmental gradients. We also built a fifth {PGLMM} to test a key underlying assumption of phylogenetic community structure: that phylogenetic information serves as a surrogate for trait information about species; this model tests whether the introduction of trait information can explain all variation in species occurrences among communities, leaving no phylogenetic residual variation. We assessed the performance of these {PGLMMs} using community simulation models and show that {PGLMMs} have equal or greater statistical power than alternative approaches currently in the literature. Finally, we illustrate the {PGLMM} advantage of fitting a model to data by showing how variation in species occurrences among communities can be partitioned into phylogenetic and site-specific components, and how fitted models can be used to predict the co-occurrence of phylogenetically related species.},
	number = {3},
	urldate = {2014-04-25},
	journal = {Ecological Monographs},
	author = {Ives, Anthony R. and Helmus, Matthew R.},
	month = jan,
	year = {2011},
	keywords = {ecophylogenetics, environmental gradient, generalized linear models, {GLMM}, null model, phylogenetic community structure, phylogenetic diversity, Phylogenetic signal, trait-based community assembly, trait variation},
	pages = {511--525}
}


@article{stroup_rethinking_2014,
	title = {Rethinking the Analysis of Non-Normal Data in Plant and Soil Science},
	volume = {106},
	url = {https://dl.sciencesocieties.org/publications/aj/articles/0/0/agronj2013.0342},
	doi = {10.2134/agronj2013.0342},
	urldate = {2014-03-02},
	journal = {Agronomy Journal},
	author = {Stroup, Walter W.},
	year = {2014},
	pages = {1--17}
}


@article{field_bootstrapping_2007,
	title = {Bootstrapping clustered data},
	volume = {69},
	doi = {10.1111/j.1467-9868.2007.00593.x},
	abstract = {Summary. Various bootstraps have been proposed for bootstrapping clustered data from one-way arrays. The simulation results in the literature suggest that some of these methods work quite well in practice; the theoretical results are limited and more mixed in their conclusions. For example, {McCullagh} reached negative conclusions about the use of non-parametric bootstraps for one-way arrays. The purpose of this paper is to extend our understanding of the issues by discussing the effect of different ways of modelling clustered data, the criteria for successful bootstraps used in the literature and extending the theory from functions of the sample mean to include functions of the between and within sums of squares and non-parametric bootstraps to include model-based bootstraps. We determine that the consistency of variance estimates for a bootstrap method depends on the choice of model with the residual bootstrap giving consistency under the transformation model whereas the cluster bootstrap gives consistent estimates under both the transformation and the random-effect model. In addition we note that the criteria based on the distribution of the bootstrap observations are not really useful in assessing consistency.},
	number = {3},
	journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
	author = {Field, C. A. and Welsh, A. H.},
	month = jun,
	year = {2007},
	keywords = {Between and Within Sums of Squares, Bootstrap, Clusters, Hierarchical Data, One-Way Arrays},
	pages = {369--390}
}

@article{gelman_lets_2002,
	title = {Let's Practice What We Preach: Turning Tables into Graphs},
	volume = {56},
	issn = {0003-1305},
	shorttitle = {Let's Practice What We Preach},
	url = {http://www.jstor.org/stable/3087382},
	abstract = {{\textless}p{\textgreater}Statisticians recommend graphical displays but often use tables to present their own research results. Could graphs do better? We study the question by going through the tables in a recent issue of the Journal of the American Statistical Association. We show how it is possible to improve the presentations using graphs that actually take up less space than the original tables. We find a particularly effective tool to be multiple repeated line plots, with comparisons of interest connected by lines and separate comparisons isolated on different plots.},
	number = {2},
	urldate = {2011-06-29},
	journal = {The American Statistician},
	author = {Gelman, Andrew and Pasarica, Cristian and Dodhia, Rahul},
	month = may,
	year = {2002},
	pages = {121--130}
}


@book{millar_maximum_2011,
	title = {Maximum Likelihood Estimation and Inference: With Examples in R, {SAS} and {ADMB}},
	isbn = {9781119977711},
	shorttitle = {Maximum Likelihood Estimation and Inference},
	abstract = {This book takes a fresh look at the popular and well-established method of maximum likelihood for statistical estimation and inference. It begins with an intuitive introduction to the concepts and background of likelihood, and moves through to the latest developments in maximum likelihood methodology, including general latent variable models and new material for the practical implementation of integrated likelihood using the free {ADMB} software. Fundamental issues of statistical inference are also examined, with a presentation of some of the philosophical debates underlying the choice of statistical {paradigm.Key} features: Provides an accessible introduction to pragmatic maximum likelihood {modelling.Covers} more advanced topics, including general forms of latent variable models (including non-linear and non-normal mixed-effects and state-space models) and the use of maximum likelihood variants, such as estimating equations, conditional likelihood, restricted likelihood and integrated {likelihood.Adopts} a practical approach, with a focus on providing the relevant tools required by researchers and practitioners who collect and analyze real {data.Presents} numerous examples and case studies across a wide range of applications including medicine, biology and {ecology.Features} applications from a range of disciplines, with implementation in R, {SAS} and/or {ADMB.Provides} all program code and software extensions on a supporting {website.Confines} supporting theory to the final chapters to maintain a readable and pragmatic focus of the preceding chapters. This book is not just an accessible and practical text about maximum likelihood, it is a comprehensive guide to modern maximum likelihood estimation and inference. It will be of interest to readers of all levels, from novice to expert. It will be of great benefit to researchers, and to students of statistics from senior undergraduate to graduate level. For use as a course text, exercises are provided at the end of each chapter.},
	language = {en},
	publisher = {John Wiley \& Sons},
	author = {Millar, Russell B.},
	month = jul,
	year = {2011},
	keywords = {Computers / Mathematical \& Statistical Software, Mathematics / Probability \& Statistics / General}
}


@misc{ohara_focus_2007,
	title = {Focus on {DIC}},
	url = {http://deepthoughtsandsilliness.blogspot.com/2007/12/focus-on-dic.html},
	urldate = {2012-03-05},
	journal = {Deep Thoughts and Silliness},
	author = {{O'Hara}, Bob},
	month = dec,
	year = {2007},
	keywords = {{DIC}, {OpenBUGS}, statistics}
}


@article{ohara_how_2009,
	title = {How to Make Models Add Up: A Primer on {GLMMs}},
	volume = {46},
	issn = {0003-{455X}},
	url = {http://www.bioone.org/doi/abs/10.5735/086.046.0205},
	doi = {10.5735/086.046.0205},
	number = {2},
	urldate = {2014-02-17},
	journal = {Annales Zoologici Fennici},
	author = {{O'Hara}, Robert B.},
	month = apr,
	year = {2009},
	pages = {124--137}
}


@article{peng_model_2012,
	title = {Model selection in linear mixed effect models},
	volume = {109},
	issn = {{0047259X}},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S0047259X12000395},
	doi = {10.1016/j.jmva.2012.02.005},
	urldate = {2014-03-09},
	journal = {Journal of Multivariate Analysis},
	author = {Peng, Heng and Lu, Ying},
	month = aug,
	year = {2012},
	pages = {109--129}
}

@BOOK{QuinnKeough2002,
  title = {Experimental Design and Data Analysis for Biologists},
  publisher = {Cambridge University Press},
  year = {2002},
  author = {Gerry P. Quinn and Michael J. Keough},
  address = {Cambridge, England},
  isbn = {0521009766}
}


@article{shang_bootstrap_2008,
	title = {Bootstrap variants of the {Akaike} information criterion for mixed model selection},
	volume = {52},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/pii/S0167947307002587},
	doi = {10.1016/j.csda.2007.06.019},
	abstract = {Two bootstrap-corrected variants of the Akaike information criterion are proposed for the purpose of small-sample mixed model selection. These two variants are asymptotically equivalent, and provide asymptotically unbiased estimators of the expected {Kullback–Leibler} discrepancy between the true model and a fitted candidate model. The performance of the criteria is investigated in a simulation study where the random effects and the errors for the true model are generated from a Gaussian distribution. The parametric bootstrap is employed. The simulation results suggest that both criteria provide effective tools for choosing a mixed model with an appropriate mean and covariance structure. A theoretical asymptotic justification for the variants is presented in the Appendix.},
	number = {4},
	urldate = {2014-03-09},
	journal = {Computational Statistics \& Data Analysis},
	author = {Shang, Junfeng and Cavanaugh, Joseph E.},
	month = jan,
	year = {2008},
	keywords = {{AIC}, {Kullback–Leibler} information, Model selection criteria},
	pages = {2004--2021}
}

@article{ives_statistics_2006,
	title = {Statistics for correlated data: phylogenies, space, and time},
	volume = {16},
	shorttitle = {Statistics for correlated data},
	url = {http://www.esajournals.org/doi/pdf/10.1890/04-0702},
	number = {1},
	urldate = {2013-06-28},
	journal = {Ecological Applications},
	author = {Ives, Anthony R. and Zhu, Jun},
	year = {2006},
	pages = {20--32}
}

@article{rousset_testing_2014,
	title = {Testing environmental and genetic effects in the presence of spatial autocorrelation},
	copyright = {© 2014 The Authors},
	issn = {1600-0587},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/ecog.00566/abstract},
	doi = {10.1111/ecog.00566},
	abstract = {Spatial autocorrelation is a well-recognized concern for observational data in general, and more specifically for spatial data in ecology. Generalized linear mixed models ({GLMMs}) with spatially autocorrelated random effects are a potential general framework for handling these spatial correlations. However, as the result of statistical and practical issues, such {GLMMs} have been fitted through the undocumented use of procedures based on penalized quasi-likelihood approximations ({PQL}), and under restrictive models of spatial correlation. Alternatively, they are often neglected in favor of simpler but more questionable approaches. In this work we aim to provide practical and validated means of inference under spatial {GLMMs}, that overcome these limitations. For this purpose, a new software is developed to fit spatial {GLMMs}. We use it to assess the performance of likelihood ratio tests for fixed effects under spatial autocorrelation, based on Laplace or {PQL} approximations of the likelihood. Expectedly, the Laplace approximation performs generally slightly better, although a variant of {PQL} was better in the binary case. We show that a previous implementation of {PQL} methods in the R language, {glmmPQL}, is not appropriate for such applications. Finally, we illustrate the efficiency of a bootstrap procedure for correcting the small sample bias of the tests, which applies also to non-spatial models.},
	language = {en},
	urldate = {2014-02-25},
	journal = {Ecography},
	author = {Rousset, François and Ferdy, Jean-Baptiste},
	year = {2014},
	pages = {no–no}
}

@article{freeman_inverse_2006,
	title = {Inverse {Box-Cox}: The power-normal distribution},
	volume = {76},
	issn = {0167-7152},
	shorttitle = {Inverse Box–Cox},
	url = {http://www.sciencedirect.com/science/article/pii/S016771520500386X},
	doi = {10.1016/j.spl.2005.10.036},
	abstract = {Box–Cox transformation system produces the power normal ({PN}) family, whose members include normal and lognormal distributions. We study the moments of {PN} and obtain expressions for its mean and variance. The quantile functions and a quantile measure of skewness are discussed to show that the {PN} family is ordered with respect to the transformation parameter. Chebyshev–Hermite polynomials are used to show that the correlation coefficient is smaller in the {PN} scale than the original scale. We use the Fréchet bounds to obtain expressions for the lower and upper bounds of the correlation coefficient. A numerical routine is used to compute the bounds. The transformation parameter of the {PN} family is used to investigate the effects of model uncertainty on the upper quantile estimates.},
	number = {8},
	urldate = {2014-11-08},
	journal = {Statistics \& Probability Letters},
	author = {Freeman, Jade and Modarres, Reza},
	month = apr,
	year = {2006},
	keywords = {Box–Cox transformation, Lognormal, Power normal, Quantiles, Skewness, Uncertainty analysis},
	pages = {764--772}
}


@Article{ebbes_regressor_2004,
  author = 	 {Ebbes, P. and Böckenholt, U. and Wedel, M.},
  title = 	 {Regressor and random-effects dependencies in multilevel models},
  journal = 	 {Statistica Neerlandica},
  year = 	 {2004},
  volume =	 {58},
  number =	 {2},
  pages =	 {161-178}
}


@Book{Lindsey1997,
  author =	 {James K. Lindsey},
  title = 	 {Applying Generalized Linear Models},
  publisher = 	 {Springer},
  year = 	 {1997}
}

@Book{Bolker2008,
  author =	 {Benjamin M. Bolker},
  title = 	 {Ecological Models and Data in R},
  publisher = 	 {Princeton University Press},
  year = 	 {2008},
  address =	 {Princeton, NJ}
}


@InCollection{feng_small_2004,
  author = 	 {Feng, Ziding and Braun, Thomas and McCulloch, Charles},
  title = 	 {Small Sample Inference for Clustered Data},
  booktitle = 	 {Proceedings of the Second {Seattle} Symposium in Biostatistics},
  publisher =	 {Springer},
  year =	 {2004},
  editor =	 {D. Y. Lin and P. J. Heagerty},
  volume =	 {179},
  pages =	 {71-87},
  address = {New York, NY},
  url =  {http://www.springerlink.com/content/h2g33m7127790343/}
}

@Article{bell_small_2010,
  author = 	 {Bell, Melanie L. and Grunwald, Gary K.},
  title = 	 {Small sample estimation properties of longitudinal count models},
  journal = 	 {Journal of Statistical Computation and Simulation},
  year = 	 {2010},
  volume =	 {81},
  number =	 {9},
  pages =	 {1067-1079},
  doi = {10.1080/00949651003674144}
}


@Article{hughes_model_2003,
  author = 	 {Hughes, A. and King , M.},
  title = 	 {Model selection using {AIC} in the presence of one-sided information},
  journal = 	 {Journal of Statistical Planning and Inference},
  year = 	 {2003},
  volume =	 {115},
  pages =	 {497-411}
}


@Article{xu_measuring_2003,
  author = 	 {Xu, R.},
  title = 	 {Measuring explained variation in linear mixed effects models},
  journal = 	 {Statist. Med.},
  year = 	 {2003},
  volume =	 {22},
  pages =	 {3527-3541},
  doi = {10.1002/sim.1572 doi:10.1002/sim.1572}
}


@Book{lawson_disease_1999,
  editor = 	 {Lawson, A. and Biggeri, A. and Bohning, D. and LeSaffre, E. and Viel, J. F. and Bertollini, R.},
  title = 	 {Disease Mapping and Risk Assessment for Public Health},
  publisher = 	 {Wiley},
  year = 	 {1999},
  address =	 {New York}
}


@Book{maindonald_data_2010,
  author =	 {Maindonald, J. and Braun, J.},
  title = 	 {Data Analysis and Graphics Using R, An Example-Based Approach},
  publisher = 	 {Cambridge University Press},
  year = 	 {2010},
  edition =	 {3}
}


@Book{rabehesketh_multilevel_2008,
  author =	 {Sophia Rabe-Hesketh and Anders Skrondal},
  title = 	 {Multilevel and Longitudinal Modeling Using Stata},
  publisher = 	 {Stata Press},
  year = 	 {2008},
  edition =	 {2},
  url = {http://www.stata-press.com/books/mlmus2.html}
}

@Article{browne_variance_2005,
  author = 	 {Browne, W. J and Subramanian, S. V. and Jones, K. and Goldstein, H.},
  title = 	 {Variance partitioning in multilevel logistic models that exhibit overdispersion.},
  journal = 	 {Journal of the Royal Statistical Society A},
  year = 	 {2005},
  volume =	 {168},
  number =	 {3},
  pages =	 {599-613},
  doi = {10.1111/j.1467-985X.2004.00365.x}
}

@Article{breslow_extrapoisson_1984,
  author = 	 {N. E. Breslow},
  title = 	 {Extra-{Poisson} variation in log-linear models},
  journal = 	 {Journal of the Royal Statistical Society C},
  year = 	 {1984},
  volume =	 {33},
  pages =	 {38-44},
  url = {http://www.jstor.org/stable/234766}
}

@InCollection{hinde_compound_1982,
  author = 	 {John Hinde},
  title = 	 {Compound {Poisson} Regression Models},
  booktitle = 	 {GLIM82: Proc. Int. Conf. on GLMs},
  publisher =	 {Springer},
  year =	 {1982},
  editor =	 {R. Gilchrist},
  pages =	 {109-121}
}

@Article{berger_integrated_1999,
  author = 	 {Berger, J. O. and Liseo, B. and Wolpert, R. L.},
  title = 	 {Integrated likelihood methods for eliminating nuisance parameters},
  journal = 	 {Statistical Science},
  year = 	 {1999},
  volume =	 {14},
  number =	 {1},
  pages =	 {1-22},
  url = {http://projecteuclid.org/download/pdf_1/euclid.ss/1009211804}
}


@Book{schabenberger_contemporary_2001,
  author =	 {Oliver Schabenberger and Francis J. Pierce},
  title = 	 {Contemporary Statistical Models for the Plant and Soil Sciences},
  publisher = 	 {CRC Press},
  year = 	 {2001},
  address =	 {Boca Raton, FL},
  isbn =  {1584881119}
}

@article{clark2015should,
  title={Should {I} use fixed or random effects?},
  author={Clark, Tom S and Linzer, Drew A},
  journal={Political Science Research and Methods},
  volume={3},
  number={02},
  pages={399--408},
  year={2015},
  publisher={Cambridge University Press}
}

@article{matuschek_balancing_2017,
journal={Journal of Memory and Language},
year=2017,
volume=94,
pages={305-315},
doi={10.1016/j.jml.2017.01.001},
	title = {Balancing Type {I} Error and Power in Linear Mixed Models},
	abstract = {Linear mixed-effects models have increasingly replaced mixed-model analyses of variance for statistical inference in factorial psycholinguistic experiments. The advantages of LMMs over ANOVAs, however, come at a cost: Setting up an LMM is not as straightforward as running an ANOVA. One simple option, when numerically possible, is to fit the full variance-covariance structure of random effects (the maximal model; Barr et al., 2013), presumably to keep Type I error down to the nominal \${\textbackslash}alpha\$ in the presence of random effects. Although it is true that fitting a model with only random intercepts may lead to higher Type I error, fitting a maximal model also has a cost: it can lead to a significant loss of power. We demonstrate this with simulations and suggest that for typical psychological and psycholinguistic data, models with a random effect structure that is supported by the data have optimal Type I error and power properties.},
	author = {Matuschek, Hannes and Kliegl, Reinhold and Vasishth, Shravan and Baayen, Harald and Bates, Douglas},
}

@article{bates_parsimonious_2015,
	title = {Parsimonious {Mixed} {Models}},
	url = {http://arxiv.org/abs/1506.04967},
	abstract = {The analysis of experimental data with mixed-effects models requires decisions about the specification of the appropriate random-effects structure. Recently, Barr et al. (2013) recommended fitting 'maximal' models with all possible random effect components included. Estimation of maximal models, however, may not converge. We show that failure to converge typically is not due to a suboptimal estimation algorithm, but is a consequence of attempting to fit a model that is too complex to be properly supported by the data, irrespective of whether estimation is based on maximum likelihood or on Bayesian hierarchical modeling with uninformative or weakly informative priors. Importantly, even under convergence, overparameterization may lead to uninterpretable models. We provide diagnostic tools for detecting overparameterization and guiding model simplification. Finally, we clarify that the simulations on which Barr et al. base their recommendations are atypical for real data. A detailed example is provided of how subject-related attentional fluctuation across trials may further qualify statistical inferences about fixed effects, and of how such nonlinear effects can be accommodated within the mixed-effects modeling framework.},
	urldate = {2015-12-31},
	journal = {arXiv:1506.04967 [stat]},
	author = {Bates, Douglas and Kliegl, Reinhold and Vasishth, Shravan and Baayen, Harald},
	month = jun,
	year = {2015},
	note = {arXiv: 1506.04967},
	keywords = {Statistics - Methodology},
	file = {arXiv\:1506.04967 PDF:/Users/bolker/Library/Application Support/Firefox/Profiles/rxerw03y.default/zotero/storage/R5GBSNI8/Bates et al. - 2015 - Parsimonious Mixed Models.pdf:application/pdf;arXiv.org Snapshot:/Users/bolker/Library/Application Support/Firefox/Profiles/rxerw03y.default/zotero/storage/3PPCR27G/1506.html:text/html}
}

@article{belshe_tundra_2013,
	title = {Tundra ecosystems observed to be {CO$_2$} sources due to differential amplification of the carbon cycle},
	copyright = {© 2013 John Wiley \& Sons {Ltd/CNRS}},
	issn = {1461-0248},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/ele.12164/abstract},
	doi = {10.1111/ele.12164},
	abstract = {Are tundra ecosystems currently a carbon source or sink? What is the future trajectory of tundra carbon fluxes in response to climate change? These questions are of global importance because of the vast quantities of organic carbon stored in permafrost soils. In this meta-analysis, we compile 40 years of {CO2} flux observations from 54 studies spanning 32 sites across northern high latitudes. Using time-series analysis, we investigated if seasonal or annual {CO2} fluxes have changed over time, and whether spatial differences in mean annual temperature could help explain temporal changes in {CO2} flux. Growing season net {CO2} uptake has definitely increased since the 1990s; the data also suggest (albeit less definitively) an increase in winter {CO2} emissions, especially in the last decade. In spite of the uncertainty in the winter trend, we estimate that tundra sites were annual {CO2} sources from the mid-1980s until the 2000s, and data from the last 7 years show that tundra continue to emit {CO2} annually. {CO2} emissions exceed {CO2} uptake across the range of temperatures that occur in the tundra biome. Taken together, these data suggest that despite increases in growing season uptake, tundra ecosystems are currently {CO2} sources on an annual basis.},
	language = {en},
	urldate = {2013-09-10},
	journal = {Ecology Letters},
	author = {Belshe, E. F. and Schuur, E. A. G. and Bolker, B. M.},
	year = {2013},
        volume = 16,
        issue = 10,
        pages = {1307-1315},
	keywords = {Carbon flux, climate change, tundra}
}

@article{friendly_effect_2003,
	series = {Data Visualization},
	title = {Effect ordering for data displays},
	volume = {43},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/pii/S0167947302002906},
	doi = {10.1016/S0167-9473(02)00290-6},
	abstract = {This paper outlines a general framework for ordering information in visual displays (tables and graphs) according to the effects or trends which we desire to see. This idea, termed effect-ordered data displays, applies principally to the arrangement of unordered factors for quantitative data and frequency data, and to the arrangement of variables and observations in multivariate displays (star plots, parallel coordinate plots, and so forth). As examples of this principle, we present several techniques for ordering items, levels or variables "optimally", based on some desired criterion. All of these may be based on eigenvalue or singular-value decompositions. Along the way, we tell some stories about data display, illustrated by graphs—some surprisingly bad, and some surprisingly good—for showing patterns, trends, and anomalies in data. We hope to raise more questions than we can provide answers for.},
	number = {4},
	urldate = {2016-01-23},
	journal = {Computational Statistics and Data Analysis},
	author = {Friendly, Michael and Kwan, Ernest},
	month = aug,
	year = {2003},
	pages = {509--539}
}

@article{wainer_visual_2001,
	title = {Visual Revelations},
	volume = {14},
	issn = {0933-2480},
	url = {http://dx.doi.org/10.1080/09332480.2001.10542269},
	doi = {10.1080/09332480.2001.10542269},
	number = {2},
	urldate = {2016-01-23},
	journal = {Chance},
	author = {Wainer, Howard},
	month = mar,
	year = {2001},
	pages = {43--46}
}

@MastersThesis{biswas2015,
  author = 	 {Keya Biswas},
  title = 	 {Performances of different estimation methods for generalized linear mixed models},
  school = 	 {McMaster University},
  year = 	 {2015},
  url = {https://macsphere.mcmaster.ca/bitstream/11375/17272/2/M.Sc_Thesis_final_Keya_Biswas.pdf}
}

@InCollection{bolker_glmm_2014,
  author =	 {Benjamin M. Bolker},
  editor =	 {Fox, Gordon A. and Negrete-Yankelevich, Simoneta and Sosa, Vinicio J.},
  booktitle = 	 {Ecological Statistics: Contemporary theory and application},
  title = 	 {Linear and Generalized Linear Mixed Models},
  publisher = 	 {Oxford University Press},
  year = 	 {2015},
  chapter =      {13},
  isbn = {978-0-19-967255-4}
}


@Article{Nelder1954,
  author = 	 {J. A. Nelder},
  title = 	 {The interpretation of negative components of variance},
  journal = 	 {Biometrika},
  year = 	 {1954},
  volume =	 {41},
  pages =	 {544-548}
}


@Article{wang_dilemma_1992,
  author = 	 {Wang, C. S.  and Yandell, B. S. and Rutledge, J. J.},
  title = 	 {The dilemma of negative analysis of variance estimators of intraclass correlation},
  journal = 	 {Theoretical and Applied Genetics},
  year = 	 {1992},
  volume =	 {85},
  pages =	 {79-88}
}


@Article{pryseley_estimating_2011,
  author = 	 {Pryseley, A. and Tchonlafi, C. and Verbeke, G. and Molenberghs, G.},
  title = 	 {Estimating negative variance components from {Gaussian} and non-{Gaussian} data: A mixed models approach},
  journal = 	 {Computational Statistics \& Data Analysis},
  year = 	 {2011},
  volume =	 {55},
  pages =	 {1071-1085}
}


@article{nakagawa_repeatability_2010,
	title = {Repeatability for {Gaussian} and non-{Gaussian} data: a practical guide for biologists},
	volume = {85},
	copyright = {© 2010 The Authors. Biological Reviews © 2010 Cambridge Philosophical Society},
	issn = {1469-185X},
	shorttitle = {Repeatability for {Gaussian} and non-{Gaussian} data},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/j.1469-185X.2010.00141.x/abstract},
	doi = {10.1111/j.1469-185X.2010.00141.x},
	abstract = {Repeatability (more precisely the common measure of repeatability, the intra-class correlation coefficient, ICC) is an important index for quantifying the accuracy of measurements and the constancy of phenotypes. It is the proportion of phenotypic variation that can be attributed to between-subject (or between-group) variation. As a consequence, the non-repeatable fraction of phenotypic variation is the sum of measurement error and phenotypic flexibility. There are several ways to estimate repeatability for Gaussian data, but there are no formal agreements on how repeatability should be calculated for non-Gaussian data (e.g. binary, proportion and count data). In addition to point estimates, appropriate uncertainty estimates (standard errors and confidence intervals) and statistical significance for repeatability estimates are required regardless of the types of data. We review the methods for calculating repeatability and the associated statistics for Gaussian and non-Gaussian data. For Gaussian data, we present three common approaches for estimating repeatability: correlation-based, analysis of variance (ANOVA)-based and linear mixed-effects model (LMM)-based methods, while for non-Gaussian data, we focus on generalised linear mixed-effects models (GLMM) that allow the estimation of repeatability on the original and on the underlying latent scale. We also address a number of methods for calculating standard errors, confidence intervals and statistical significance; the most accurate and recommended methods are parametric bootstrapping, randomisation tests and Bayesian approaches. We advocate the use of LMM- and GLMM-based approaches mainly because of the ease with which confounding variables can be controlled for. Furthermore, we compare two types of repeatability (ordinary repeatability and extrapolated repeatability) in relation to narrow-sense heritability. This review serves as a collection of guidelines and recommendations for biologists to calculate repeatability and heritability from both Gaussian and non-Gaussian data.},
	language = {en},
	number = {4},
	urldate = {2015-07-29},
	journal = {Biological Reviews},
	author = {Nakagawa, Shinichi and Schielzeth, Holger},
	month = nov,
	year = {2010},
	keywords = {analysis of variance (ANOVA), confidence intervals, credibility intervals, generalised linear mixed-effects model (GLMM), heritability, intra-class correlation coefficient (ICC), Markov chain Monte Carlo (MCMC), restricted maximum likelihood (REML), Statistical Significance},
	pages = {935--956}
}


@article{johnson_extension_2014,
	title = {Extension of {Nakagawa} \& {Schielzeth}'s {R}2GLMM to random slopes models},
	volume = {5},
	copyright = {© 2014 The Author. Methods in Ecology and Evolution published by John Wiley \& Sons Ltd on behalf of British Ecological society., This is an open access article under the terms of the Creative Commons Attribution License, which permits use, distribution and reproduction in any medium, provided the original work is properly cited.},
	issn = {2041-210X},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/2041-210X.12225/abstract},
	doi = {10.1111/2041-210X.12225},
	abstract = {* Nakagawa \& Schielzeth extended the widely used goodness-of-fit statistic R2 to apply to generalized linear mixed models (GLMMs). However, their R2GLMM method is restricted to models with the simplest random effects structure, known as random intercepts models. It is not applicable to another common random effects structure, random slopes models.


* I show that R2GLMM can be extended to random slopes models using a simple formula that is straightforward to implement in statistical software. This extension substantially widens the potential application of R2GLMM.},
	language = {en},
	number = {9},
	urldate = {2015-07-29},
	journal = {Methods in Ecology and Evolution},
	author = {Johnson, Paul C.D.},
	month = sep,
	year = {2014},
	keywords = {coefficient of determination, generalized linear mixed model, random regression, random slopes model},
	pages = {944--946}
}


@article{nakagawa_general_2013,
	title = {A general and simple method for obtaining {R}2 from generalized linear mixed-effects models},
	volume = {4},
	issn = {2041210X},
	url = {http://doi.wiley.com/10.1111/j.2041-210x.2012.00261.x},
	doi = {10.1111/j.2041-210x.2012.00261.x},
	number = {2},
	urldate = {2013-07-14},
	journal = {Methods in Ecology and Evolution},
	author = {Nakagawa, Shinichi and Schielzeth, Holger},
	month = feb,
	year = {2013},
	pages = {133--142}
}


@article{gelman_bayesian_2006,
	title = {Bayesian measures of explained variance and pooling in multilevel (hierarchical) models},
	volume = {48},
	url = {http://amstat.tandfonline.com/doi/abs/10.1198/004017005000000517},
	number = {2},
	urldate = {2016-06-21},
	journal = {Technometrics},
	author = {Gelman, Andrew and Pardoe, Iain},
	year = {2006},
	pages = {241--251}
}

@article{bolker_strategies_2013,
	title = {Strategies for fitting nonlinear ecological models in {R}, {AD} {Model} {Builder}, and {BUGS}},
	volume = {4},
	issn = {{2041210X}},
	url = {http://doi.wiley.com/10.1111/2041-210X.12044},
	doi = {10.1111/2041-210X.12044},
	number = {6},
	urldate = {2013-06-11},
	journal = {Methods in Ecology and Evolution},
	author = {Bolker, Benjamin M. and Gardner, Beth and Maunder, Mark and Berg, Casper W. and Brooks, Mollie and Comita, Liza and Crone, Elizabeth and Cubaynes, Sarah and Davies, Trevor and de Valpine, Perry and Ford, Jessica and Gimenez, Olivier and Kéry, Marc and Kim, Eun Jung and Lennert-Cody, Cleridy and Magnusson, Arni and Martell, Steve and Nash, John and Nielsen, Anders and Regetz, Jim and Skaug, Hans and Zipkin, Elise},
	editor = {Ramula, Satu},
	month = jun,
	year = {2013},
	pages = {501--512}
}


@article{quinones_cryptic_2015,
	title = {Cryptic extended brood care in the facultatively eusocial sweat bee {{\em Megalopta genalis}}},
	volume = {62},
	issn = {0020-1812, 1420-9098},
	url = {http://link.springer.com/10.1007/s00040-015-0409-3},
	doi = {10.1007/s00040-015-0409-3},
	language = {en},
	number = {3},
	journal = {Insectes Sociaux},
	author = {Quiñones, A. E. and Wcislo, W. T.},
	month = aug,
	year = {2015},
	pages = {307--313}
}

@article{lo_transform_2015,
	title = {To transform or not to transform: using generalized linear mixed models to analyse reaction time data},
	volume = {6},
	issn = {1664-1078},
	shorttitle = {To transform or not to transform},
	url = {http://journal.frontiersin.org/article/10.3389/fpsyg.2015.01171/abstract},
	doi = {10.3389/fpsyg.2015.01171},
	abstract = {Linear mixed-effect models (LMMs) are being increasingly widely used in psychology to analyse multi-level research designs. This feature allows LMMs to address some of the problems identified by Speelman and McGann (2013) about the use of mean data, because they do not average across individual responses. However, recent guidelines for using LMM to analyse skewed reaction time (RT) data collected in many cognitive psychological studies recommend the application of non-linear transformations to satisfy assumptions of normality. Uncritical adoption of this recommendation has important theoretical implications which can yield misleading conclusions. For example, Balota, Aschenbrenner and Yap (2013) showed that analyses of raw RT produced additive effects of word frequency and stimulus quality on word identification, which conflicted with the interactive effects observed in analyses of transformed RT. Generalized linear mixed-effect models (GLMM) provide a solution to this problem by satisfying normality assumptions without the need for transformation. This allows differences between individuals to be properly assessed, using the metric most appropriate to the researcher’s theoretical context. We outline the major theoretical decisions involved in specifying a GLMM, and illustrate them by reanalysing Balota et al.’s datasets. We then consider the broader benefits of using GLMM to investigate individual differences.},
	language = {English},
	urldate = {2017-01-03},
	journal = {Frontiers in Psychology},
	author = {Lo, Steson and Andrews, Sally},
	year = {2015},
	keywords = {Additive factors, generalized linear mixed-effect models, interaction effects, mental chronometry, RT transformations}
}


@book{mcelreath_statistical_2015,
	address = {Boca Raton},
	title = {Statistical {Rethinking}: {A} {Bayesian} {Course} with {Examples} in {R} and {Stan}},
	isbn = {978-1-4822-5344-3},
	shorttitle = {Statistical {Rethinking}},
	abstract = {Statistical Rethinking: A Bayesian Course with Examples in R and Stan builds readers’ knowledge of and confidence in statistical modeling. Reflecting the need for even minor programming in today’s model-based statistics, the book pushes readers to perform step-by-step calculations that are usually automated. This unique computational approach ensures that readers understand enough of the details to make reasonable choices and interpretations in their own modeling work.  The text presents generalized linear multilevel models from a Bayesian perspective, relying on a simple logical interpretation of Bayesian probability and maximum entropy. It covers from the basics of regression to multilevel models. The author also discusses measurement error, missing data, and Gaussian process models for spatial and network autocorrelation.  By using complete R code examples throughout, this book provides a practical foundation for performing statistical inference. Designed for both PhD students and seasoned professionals in the natural and social sciences, it prepares them for more advanced or specialized statistical modeling.  Web ResourceThe book is accompanied by an R package (rethinking) that is available on the author’s website and GitHub. The two core functions (map and map2stan) of this package allow a variety of statistical models to be constructed from standard model formulas.},
	language = {English},
	publisher = {Chapman and Hall/CRC},
	author = {McElreath, Richard},
	month = dec,
	year = {2015}
}

@book{faraway_extending_2016,
title = {Extending {Linear} {Models} with {R}: {Generalized} {Linear}, {Mixed} {Effects} and {Nonparametric} {Regression} {Models}},
	publisher = {Chapman \& Hall/CRC},
	author = {Faraway, Julian J.},
	edition={2},
	year = {2016}
}

@book{faraway_extending_2006,
title = {Extending {Linear} {Models} with {R}: {Generalized} {Linear}, {Mixed} {Effects} and {Nonparametric} {Regression} {Models}},
	publisher = {Chapman \& Hall/CRC},
	author = {Faraway, Julian J.},
	year = {2006}
}

@article{baird_performance_2016,
	title = {Performance of time-varying predictors in multilevel models under an assumption of fixed or random effects.},
	volume = {21},
	issn = {1939-1463, 1082-989X},
	url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/met0000070},
	doi = {10.1037/met0000070},
	language = {en},
	number = {2},
	urldate = {2017-05-09},
	journal = {Psychological Methods},
	author = {Baird, Rachel and Maxwell, Scott E.},
	year = {2016},
	pages = {175--188}
}


@article{eager_mixed_2017,
	title = {Mixed {Effects} {Models} are {Sometimes} {Terrible}},
	url = {https://arxiv.org/abs/1701.04858},
	urldate = {2017-05-07},
	journal = {arXiv preprint arXiv:1701.04858},
	author = {Eager, Christopher and Roy, Joseph},
	year = {2017}
}

 @Article{bates_fitting_2015,
    title = {Fitting Linear Mixed-Effects Models Using {lme4}},
    author = {Douglas Bates and Martin M{\"a}chler and Benjamin M. Bolker and Steven C. Walker},
    journal = {Journal of Statistical Software},
    year = {2015},
    volume = {67},
    number = {1},
    pages = {1--48},
    doi = {10.18637/jss.v067.i01},
  }


@book{angrist_mostly_2009,
	address = {Princeton},
	edition = {1 edition},
	title = {Mostly {Harmless} {Econometrics}: {An} {Empiricist}'s {Companion}},
	isbn = {978-0-691-12035-5},
	shorttitle = {Mostly {Harmless} {Econometrics}},
	abstract = {The core methods in today's econometric toolkit are linear regression for statistical control, instrumental variables methods for the analysis of natural experiments, and differences-in-differences methods that exploit policy changes. In the modern experimentalist paradigm, these techniques address clear causal questions such as: Do smaller classes increase learning? Should wife batterers be arrested? How much does education raise wages? Mostly Harmless Econometrics shows how the basic tools of applied econometrics allow the data to speak.  In addition to econometric essentials, Mostly Harmless Econometrics covers important new extensions--regression-discontinuity designs and quantile regression--as well as how to get standard errors right. Joshua Angrist and Jörn-Steffen Pischke explain why fancier econometric techniques are typically unnecessary and even dangerous. The applied econometric methods emphasized in this book are easy to use and relevant for many areas of contemporary social science.  An irreverent review of econometric essentials  A focus on tools that applied researchers use most  Chapters on regression-discontinuity designs, quantile regression, and standard errors  Many empirical examples  A clear and concise resource with wide applications},
	language = {English},
	publisher = {Princeton University Press},
	author = {Angrist, Joshua D. and Pischke, Jörn-Steffen},
	month = jan,
	year = {2009}
}

@ARTICLE{Hurlbert1984,
  author = {S. Hurlbert},
  title = {Pseudoreplication and the Design of Ecological Field Experiments},
  journal = {Ecological Monographs},
  year = {1984},
  volume = {54},
  pages = {187-211}
}


@article{harrison_comparison_2015,
	title = {A comparison of observation-level random effect and {Beta}-{Binomial} models for modelling overdispersion in {Binomial} data in ecology and evolution},
	volume = {3},
	issn = {2167-8359},
	url = {https://peerj.com/articles/1114},
	doi = {10.7717/peerj.1114},
	language = {en},
	urldate = {2015-07-21},
	journal = {PeerJ},
	author = {Harrison, Xavier A.},
	month = jul,
	year = {2015},
	pages = {e1114}
}

@article{harrison_using_2014,
	title = {Using observation-level random effects to model overdispersion in count data in ecology and evolution},
	volume = {2},
	issn = {2167-8359},
	url = {https://peerj.com/articles/616},
	doi = {10.7717/peerj.616},
	language = {en},
	urldate = {2015-08-10},
	journal = {PeerJ},
	author = {Harrison, Xavier A.},
	month = oct,
	year = {2014},
	pages = {e616}
}


@article{gonzales-barron_comparison_2011,
	title = {A comparison between the discrete {Poisson}-gamma and {Poisson}-lognormal distributions to characterise microbial counts in foods},
	volume = {22},
	issn = {0956-7135},
	url = {http://www.sciencedirect.com/science/article/pii/S0956713511000491},
	doi = {10.1016/j.foodcont.2011.01.029},
	abstract = {The choice of statistical distributions characterising microbial counts is essential in risk assessment and risk management. While the lognormal distribution has been long used to directly model the microbial data obtained from food samples, it does not allow for complete absence of microorganisms in a sample. Within a heterogeneous Poisson theoretical interpretation, a gamma or a lognormal population distribution for the unknown microbial concentration and a Poisson measurement distribution produces a discrete Poisson-gamma (λ, 1/k) or a Poisson-lognormal (μ,σ) distribution of observed plate counts. The capability of both distributions to deal with clustering was compared using six data sets of variable proportion of zero counts: total viable counts, coliforms and Escherichia coli on pre-chill and post-chill beef carcasses. Whereas the Poisson-lognormal distribution fitted better to the high counts data sets, the Poisson-gamma distribution represented the low counts data sets (13–81\% zero counts) by far better than the Poisson-lognormal – which invariably tended to have a longer tail, an overestimated mean log and a lower predicted probability of zero counts. The inverse close relationship between the observed proportion of zero counts in the data set and the fitted dispersion factor 1/k suggested the possibility of obtaining a first approximation of 1/k by this means. Finally, in absence of zero counts, it was demonstrated that fitting a Poisson-lognormal to the observed plate count data can be closely approximated by the common practice of fitting a simple normal distribution to the back-calculated ‘unobserved’ mean concentrations in log CFU/g.},
	number = {8},
	urldate = {2017-11-22},
	journal = {Food Control},
	author = {Gonzales-Barron, Ursula and Butler, Francis},
	month = aug,
	year = {2011},
	keywords = {Distributions, Gamma, Lognormal, Microbial counts, Negative binomial, Poisson-gamma, Poisson-lognormal},
	pages = {1279--1286}
}


@misc{sweetser_diamond_2017,
        author={Sweetser,Timothy},
	title = {diamond: {Python} solver for mixed-effects models},
	shorttitle = {diamond},
	url = {https://github.com/stitchfix/diamond},
	urldate = {2017-11-28},
	publisher = {Stitch Fix Technology},
	month = nov,
	year = {2017},
	note = {original-date: 2017-08-07T19:06:10Z}
}

@misc{bates_mixedmodels_2017,
	title = {{MixedModels}.jl: {A} {Julia} package for fitting (statistical) mixed-effects models},
	shorttitle = {{MixedModels}.jl},
	url = {https://github.com/dmbates/MixedModels.jl},
	urldate = {2017-11-28},
	author = {Bates, Douglas},
	month = nov,
	year = {2017},
	note = {original-date: 2013-03-29T21:24:25Z},
	keywords = {julia, mixed-effects, mixed-models, statistical-models}
}


@article{bhattacharya_sparse_2011,
	title = {Sparse {Bayesian} infinite factor models},
	volume = {98},
	issn = {0006-3444},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3419391/},
	doi = {10.1093/biomet/asr013},
	abstract = {We focus on sparse modelling of high-dimensional covariance matrices using Bayesian latent factor models. We propose a multiplicative gamma process shrinkage prior on the factor loadings which allows introduction of infinitely many factors, with the loadings increasingly shrunk towards zero as the column index increases. We use our prior on a parameter-expanded loading matrix to avoid the order dependence typical in factor analysis models and develop an efficient Gibbs sampler that scales well as data dimensionality increases. The gain in efficiency is achieved by the joint conjugacy property of the proposed prior, which allows block updating of the loadings matrix. We propose an adaptive Gibbs sampler for automatically truncating the infinite loading matrix through selection of the number of important factors. Theoretical results are provided on the support of the prior and truncation approximation bounds. A fast algorithm is proposed to produce approximate Bayes estimates. Latent factor regression methods are developed for prediction and variable selection in applications with high-dimensional correlated predictors. Operating characteristics are assessed through simulation studies, and the approach is applied to predict survival times from gene expression data.},
	number = {2},
	urldate = {2017-11-28},
	journal = {Biometrika},
	author = {Bhattacharya, A. and Dunson, D. B.},
	month = jun,
	year = {2011},
	pmid = {23049129},
	pmcid = {PMC3419391},
	pages = {291--306}
}


@article{carpenter_stan_2016,
	title = {Stan: {A} probabilistic programming language},
	volume = {20},
	shorttitle = {Stan},
        url = {http://www.stat.columbia.edu/~gelman/research/unpublished/stan-resubmit-JSS1293.pdf},
	urldate = {2017-06-20},
	journal = {Journal of Statistical Software},
	author = {Carpenter, Bob and Gelman, Andrew and Hoffman, Matt and Lee, Daniel and Goodrich, Ben and Betancourt, Michael and Brubaker, Michael A. and Guo, Jiqiang and Li, Peter and Riddell, Allen},
	year = {2016}
}


@article{burkner_brms_2017,
        year = 2017,
	title = {brms: {An} {R} {Package} for {Bayesian} {Multilevel} {Models} {Using} {Stan}},
journal = {Journal of Statistical Software},
  author= {Bürkner,Paul-Christian},
volume= 80,
issue = 1,
	shorttitle = {brms},
	url = {https://www.jstatsoft.org/article/view/v080i01},
	doi = {10.18637/jss.v080.i01},
	urldate = {2017-11-28},
	keywords = {Bayesian inference, R, MCMC, multilevel model, ordinal data, Stan}
}

@Book{wood_generalized_2017,
  author =	 {Simon Wood},
  title = 	 {Generalized Additive Models: An Introduction with R},
  publisher = 	 {Chapman \& Hall},
  year = 	 {2017},
  series =	 {CRC Texts in Statistical Science},
  edition = {2d}
}

@article{brooks_modeling_2017,
title={{glmmTMB} balances speed and flexibility among packages for zero-inflated generalized linear mixed modeling},
author = {Mollie E. Brooks and Kasper Kristensen and Koen J. {van Benthem} and Arni Magnusson and Casper W. Berg and Anders Nielsen and Hans J. Skaug and Martin Mächler and Benjamin M. Bolker},
journal={R Journal},
year = 2017,
volume = 9,
issue = 2,
pages= {378-400},
url={https://journal.r-project.org/archive/2017/RJ-2017-066/RJ-2017-066.pdf},
	abstract = {Ecological phenomena are often measured in the form of count data. These data can be analyzed using generalized linear mixed models (GLMMs) when observations are correlated in ways that require random effects. However, count data are often zero-inflated, containing more zeros than would be expected from the standard error distributions used in GLMMs, e.g., parasite counts may be exactly zero for hosts with effective immune defenses but vary according to a negative binomial distribution for non-resistant hosts. We present a new R package, glmmTMB, that increases the range of models that can easily be fitted to count data using maximum likelihood estimation. The interface was developed to be familiar to users of the lme4 R package, a common tool for fitting GLMMs. To maximize speed and flexibility, estimation is done using Template Model Builder (TMB), utilizing automatic differentiation to estimate model gradients and the Laplace approximation for handling random effects. We demonstrate glmmTMB and compare it to other available methods using two ecological case studies. In general, glmmTMB is more flexible than other packages available for estimating zero-inflated models via maximum likelihood estimation and is faster than packages that use Markov chain Monte Carlo sampling for estimation; it is also more flexible for zero-inflated modelling than INLA, but speed comparisons vary with model and data structure. Our package can be used to fit GLMs and GLMMs with or without zero-inflation as well as hurdle models. By allowing ecologists to quickly estimate a wide variety of models using a single package, glmmTMB makes it easier to find appropriate models and test hypotheses to describe ecological processes.},
}

@inproceedings{stroup_non-normal_2013,
	address = {Kansas State University},
	title = {Non-normal data in agricultural experiments},
	url = {http://newprairiepress.org/agstatconference/2013/proceedings/8},
	author = {Stroup, W. W.},
	year = {2013}
}


@article{wolfinger_generalized_1993,
	title = {Generalized linear mixed models a pseudo-likelihood approach},
	volume = {48},
	issn = {0094-9655, 1563-5163},
	url = {http://www.tandfonline.com/doi/abs/10.1080/00949659308811554},
	doi = {10.1080/00949659308811554},
	language = {en},
	number = {3-4},
	urldate = {2018-02-19},
	journal = {Journal of Statistical Computation and Simulation},
	author = {Wolfinger, Russ and O'Connell, Michael},
	month = dec,
	year = {1993},
	pages = {233--243}
}


@article{warton_many_2005,
	title = {Many zeros does not mean zero inflation: comparing the goodness-of-fit of parametric models to multivariate abundance data},
	volume = {16},
	shorttitle = {Many zeros does not mean zero inflation},
	url = {http://dx.doi.org/10.1002/env.702},
	doi = {10.1002/env.702},
	abstract = {An important step in studying the ecology of a species is choosing a statistical model of abundance; however, there has been little general consideration of which statistical model to use. In particular, abundance data have many zeros (often 50-80 per cent of all values), and zero-inflated count distributions are often used to specifically model the high frequency of zeros in abundance data. However, in such cases it is often taken for granted that a zero-inflated model is required, and the goodness-of-fit to count distributions with and without zero inflation is not often compared for abundance data.In this article, the goodness-of-fit was compared for several marginal models of abundance in 20 multivariate datasets (a total of 1672 variables across all datasets) from different sources. Multivariate abundance data are quite commonly collected in applied ecology, and the properties of these data may differ from abundances collected in autecological studies. Goodness-of-fit was assessed using AIC values, graphs of observed vs expected proportion of zeros in a dataset, and graphs of the sample mean-variance relationship.The negative binomial model was the best fitting of the count distributions, without zero-inflation. The high frequency of zeros was well described by the systematic component of the model (i.e. at some places predicted abundance was high, while at others it was zero) and so it was rarely necessary to modify the random component of the model (i.e. fitting a zero-inflated distribution). A Gaussian model based on transformed abundances fitted data surprisingly well, and rescaled per cent cover was usually poorly fitted by a count distribution. In conclusion, results suggest that the high frequency of zeros commonly seen in multivariate abundance data is best considered to come from distributions where mean abundance is often very low (hence there are many zeros), as opposed to claiming that there are an unusually high number of zeros compared to common parametric distributions. Copyright � 2005 John Wiley \& Sons, Ltd.},
	number = {3},
	urldate = {2008-05-28},
	journal = {Environmetrics},
	author = {Warton, David I.},
	year = {2005},
	pages = {275--289}
}


@article{martin_zero_2005,
	title = {Zero tolerance ecology: improving ecological inference by modelling the source of zero observations: {Modelling} excess zeros in ecology},
	volume = {8},
	issn = {1461023X},
	shorttitle = {Zero tolerance ecology},
	url = {http://doi.wiley.com/10.1111/j.1461-0248.2005.00826.x},
	doi = {10.1111/j.1461-0248.2005.00826.x},
	language = {en},
	number = {11},
	urldate = {2018-02-27},
	journal = {Ecology Letters},
	author = {Martin, Tara G. and Wintle, Brendan A. and Rhodes, Jonathan R. and Kuhnert, Petra M. and Field, Scott A. and Low-Choy, Samantha J. and Tyre, Andrew J. and Possingham, Hugh P.},
	month = nov,
	year = {2005},
	pages = {1235--1246}
}


@incollection{zuur_zero-truncated_2009,
	address = {New York, NY},
	title = {Zero-{Truncated} and {Zero}-{Inflated} {Models} for {Count} {Data}},
	isbn = {978-0-387-87457-9 978-0-387-87458-6},
	url = {http://www.springerlink.com/content/m087275807178771/},
	urldate = {2011-11-24},
	booktitle = {Mixed effects models and extensions in ecology with {R}},
	publisher = {Springer New York},
	author = {Zuur, Alain F. and Ieno, Elena N. and Walker, Neil J. and Saveliev, Anatoly A. and Smith, Graham M.},
	collaborator = {Zuur, Alain F. and Ieno, Elena N. and Walker, Neil and Saveliev, Anatoly A. and Smith, Graham M.},
	year = {2009},
	pages = {261--293}
}

@book{cleveland_visualizing_1993,
	address = {Summit, NJ},
	title = {Visualizing {Data}},
	publisher = {Hobart Press},
	author = {Cleveland, William},
	year = {1993}
}

@article{johnson_power_2015,
	title = {Power analysis for generalized linear mixed models in ecology and evolution},
	volume = {6},
	issn = {2041-210X},
	url = {https://besjournals.onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.12306},
	doi = {10.1111/2041-210X.12306},
	language = {en},
	number = {2},
	urldate = {2018-03-20},
	journal = {Methods in Ecology and Evolution},
	author = {Johnson, Paul C. D. and Barry, Sarah J. E. and Ferguson, Heather M. and Müller, Pie},
	month = feb,
	year = {2015},
	pages = {133--142}
}

@article{kain_practical_2015,
	title = {A practical guide and power analysis for {GLMMs}: detecting among treatment variation in random effects},
	volume = {3},
	issn = {2167-8359},
	shorttitle = {A practical guide and power analysis for {GLMMs}},
	url = {https://peerj.com/articles/1226},
	doi = {10.7717/peerj.1226},
	abstract = {In ecology and evolution generalized linear mixed models (GLMMs) are becoming increasingly used to test for differences in variation by treatment at multiple hierarchical levels. Yet, the specific sampling schemes that optimize the power of an experiment to detect differences in random effects by treatment/group remain unknown. In this paper we develop a blueprint for conducting power analyses for GLMMs focusing on detecting differences in variance by treatment. We present parameterization and power analyses for random-intercepts and random-slopes GLMMs because of their generality as focal parameters for most applications and because of their immediate applicability to emerging questions in the field of behavioral ecology. We focus on the extreme case of hierarchically structured binomial data, though the framework presented here generalizes easily to any error distribution model. First, we determine the optimal ratio of individuals to repeated measures within individuals that maximizes power to detect differences by treatment in among-individual variation in intercept, among-individual variation in slope, and within-individual variation in intercept. Second, we explore how power to detect differences in target variance parameters is affected by total variation. Our results indicate heterogeneity in power across ratios of individuals to repeated measures with an optimal ratio determined by both the target variance parameter and total sample size. Additionally, power to detect each variance parameter was low overall (in most cases {\textgreater}1,000 total observations per treatment needed to achieve 80\% power) and decreased with increasing variance in non-target random effects. With growing interest in variance as the parameter of inquiry, these power analyses provide a crucial component for designing experiments focused on detecting differences in variance. We hope to inspire novel experimental designs in ecology and evolution investigating the causes and implications of individual-level phenotypic variance, such as the adaptive significance of within-individual variation.},
	language = {en},
	urldate = {2018-03-20},
	journal = {PeerJ},
	author = {Kain, Morgan P. and Bolker, Ben M. and McCoy, Michael W.},
	month = sep,
	year = {2015},
	pages = {e1226}
}


@article{snijders_standard_1993,
	title = {Standard {Errors} and {Sample} {Sizes} for {Two}-{Level} {Research}},
	volume = {18},
	issn = {03629791},
	url = {http://www.jstor.org/stable/1165134?origin=crossref},
	doi = {10.2307/1165134},
	number = {3},
	urldate = {2018-03-20},
	journal = {Journal of Educational Statistics},
	author = {Snijders, Tom A. B. and Bosker, Roel J.},
	year = {1993},
	pages = {237}
}


@article{muff_stefanie_marginal_2016,
	title = {Marginal or conditional regression models for correlated non‐normal data?},
	volume = {7},
	issn = {2041-210X},
	url = {https://besjournals.onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.12623},
	doi = {10.1111/2041-210X.12623},
	abstract = {Summary Correlated data are ubiquitous in ecological and evolutionary research, and appropriate statistical analysis requires that these correlations are taken into account. For regressions with correlated, non?normal outcomes, two main approaches are used: conditional and marginal modelling. The former leads to generalized linear mixed models (GLMMs), while the latter are estimated using generalized estimating equations (GEEs), or marginalized multilevel regression models. Differences, advantages and drawbacks of conditional and marginal models have been discussed extensively in the statistical and applied literature, and there is some agreement that the choice of the model must depend on the question under study. Yet, there still appears to be a lot of confusion and disagreement over when to choose which model. We start with a review of conditional and marginal models, and the differences in the interpretation of the resulting parameter estimates. We highlight that the two types of models propagate different linear relations between the covariates and the response. Moreover, while conditional models explicitly account for heterogeneity among clustered observations, marginal models yield averages over such heterogeneities and are therefore often interpreted as population?averaged models. We point out theoretically and with an example that when modelling non?normal outcomes no unambiguous definition of a marginal model generally exists. Instead, marginal model parameters are marginal only with respect to unaccounted differences among clusters and thus depend on the fixed effects in the model. Therefore, marginal model parameters should not be loosely interpreted as population?averaged parameters. In addition, we explain how marginal modelling is mathematically analogous to deliberately omitting covariates with explanatory power, and to deliberately introducing a Berkson measurement error into covariates. We also reiterate that marginal modelling is related to a well?known statistical phenomenon, the Simpson's paradox. In most cases, therefore, we regard the conditional model as the more powerful choice to explain how covariates are associated with a non?normal response. Still, marginal models can be useful, given that the scientific question explicitly requires such a model formulation.},
	number = {12},
	urldate = {2018-03-20},
	journal = {Methods in Ecology and Evolution},
	author = {{Muff Stefanie} and {Held Leonhard} and {Keller Lukas F.} and {Matthiopoulos Jason}},
	month = aug,
	year = {2016},
	keywords = {attenuation, Berkson measurement error, conditional model, generalized estimating equations, generalized linear mixed model, omitted covariates, Simpson's paradox},
	pages = {1514--1524}
}


@article{schoener_nonsynchronous_1970,
	title = {Nonsynchronous {Spatial} {Overlap} of {Lizards} in {Patchy} {Habitats}},
	volume = {51},
	issn = {0012-9658},
	url = {https://esajournals.onlinelibrary.wiley.com/doi/abs/10.2307/1935376},
	doi = {10.2307/1935376},
	abstract = {Sympatric native Anolis species with similar structural habitats but contrasting climatic habitats are closer in head and body size on species?rich than on depauperate islands. In two localities, sympatric Anolis species with differential occurrences in sun or shade sought lower, more shaded perches during midday, resulting in partly nonsynchronous utilization of the vegetation by the two species. The second observation may be related to the first in the following way: nonsynchronous spatial overlap could dictate relatively great resource overlap for species coinhabiting patchy or edge areas, requiring great differences between the species in prey size in addition to those in climatic habitat. The extent of such overlap on small depauperate islands could be greater if these contained a greater proportion of patchy or edge habitats (with respect to insolation), or if climatic preferences were broader and more overlapping than on large, species?rich islands. In each locality, the relatively more shade?inhabiting species occurred more often on larger perches and on lower perches than did the other species. In both species of the Bermudan pair, adult males occupied higher and larger perches, and in grahami, shadier perches, than did female?sized individuals. The statistical significance of these and other differences was evaluated using several unweighted g2 procedures, Cochran's weighted g2 test and a partitioning technique for analyzing interactions among variables in complex contingency tables. The last method is described in detail in the papaer by Fienberg, immediately following this one.},
	number = {3},
	urldate = {2018-04-21},
	journal = {Ecology},
	author = {Schoener, Thomas W.},
	month = may,
	year = {1970},
	pages = {408--418}
}


@article{steele_immunization_1996,
	title = {Immunization uptake in rural {Bangladesh}: a multilevel analysis},
	volume = {159},
        year = {1996},
	journal = {Journal of the Royal Statistical Society A},
	author = {Steele, F. and Diamond, I. and Amin, S.},
	pages = {289--299}
}


@article{gelman_difference_2006,
	title = {The {Difference} {Between} “{Significant}” and “{Not} {Significant}” is not {Itself} {Statistically} {Significant}},
	volume = {60},
	issn = {0003-1305, 1537-2731},
	url = {http://www.tandfonline.com/doi/abs/10.1198/000313006X152649},
	doi = {10.1198/000313006X152649},
	language = {en},
	number = {4},
	urldate = {2015-11-10},
	journal = {The American Statistician},
	author = {Gelman, Andrew and Stern, Hal},
	month = nov,
	year = {2006},
	pages = {328--331}
}


@article{dushoff_mortality_2006,
	title = {Mortality due to {Influenza} in the {United} {States}—{An} {Annualized} {Regression} {Approach} {Using} {Multiple}-{Cause} {Mortality} {Data}},
	volume = {163},
	issn = {0002-9262, 1476-6256},
	url = {http://aje.oxfordjournals.org/content/163/2/181},
	doi = {10.1093/aje/kwj024},
	abstract = {Influenza is an important cause of mortality in temperate countries, but there is substantial controversy as to the total direct and indirect mortality burden imposed by influenza viruses. The authors have extracted multiple-cause death data from public-use data files for the United States from 1979 to 2001. The current research reevaluates attribution of deaths to influenza, by use of an annualized regression approach: comparing measures of excess deaths with measures of influenza virus prevalence by subtype over entire influenza seasons and attributing deaths to influenza by a regression model. This approach is more conservative in its assumptions than is earlier work, which used weekly regression models, or models based on fitting baselines, but it produces results consistent with these other methods, supporting the conclusion that influenza is an important cause of seasonal excess deaths. The regression model attributes an annual average of 41,400 (95\% confidence interval: 27,100, 55,700) deaths to influenza over the period 1979–2001. The study also uses regional death data to investigate the effects of cold weather on annualized excess deaths.},
	language = {en},
	number = {2},
	urldate = {2014-01-09},
	journal = {American Journal of Epidemiology},
	author = {Dushoff, Jonathan and Plotkin, Joshua B. and Viboud, Cecile and Earn, David J. D. and Simonsen, Lone},
	month = jan,
	year = {2006},
	pmid = {16319291},
	keywords = {Temperature, NAD, normalized annual death series, United States, time series, Seasons, Tthresh, threshold temperature, mortality, INFLUENZA, cause of death, linear regression},
	pages = {181--187}
}

@article{laird_random-effects_1982,
	title = {Random-{Effects} {Models} for {Longitudinal} {Data}},
	volume = {38},
	issn = {0006-341X},
	url = {http://www.jstor.org/stable/2529876},
	doi = {10.2307/2529876},
	abstract = {Models for the analysis of longitudinal data must recognize the relationship between serial observations on the same unit. Multivariate models with general covariance structure are often difficult to apply to highly unbalanced data, whereas two-stage random-effects models can be used easily. In two-stage models, the probability distributions for the response vectors of different individuals belong to a single family, but some random-effects parameters vary across individuals, with a distribution specified at the second stage. A general family of models is discussed, which includes both growth models and repeated-measures models as special cases. A unified approach to fitting these models, based on a combination of empirical Bayes and maximum likelihood estimation of model parameters and using the EM algorithm, is discussed. Two examples are taken from a current epidemiological study of the health effects of air pollution.},
	number = {4},
	urldate = {2018-06-19},
	journal = {Biometrics},
	author = {Laird, Nan M. and Ware, James H.},
	year = {1982},
	pages = {963--974}
}


@article{greenland_penalization_2015,
	title = {Penalization, bias reduction, and default priors in logistic and related categorical and survival regressions},
	volume = {34},
	copyright = {Copyright © 2015 John Wiley \& Sons, Ltd.},
	issn = {1097-0258},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.6537},
	doi = {10.1002/sim.6537},
	abstract = {Penalization is a very general method of stabilizing or regularizing estimates, which has both frequentist and Bayesian rationales. We consider some questions that arise when considering alternative penalties for logistic regression and related models. The most widely programmed penalty appears to be the Firth small-sample bias-reduction method (albeit with small differences among implementations and the results they provide), which corresponds to using the log density of the Jeffreys invariant prior distribution as a penalty function. The latter representation raises some serious contextual objections to the Firth reduction, which also apply to alternative penalties based on t-distributions (including Cauchy priors). Taking simplicity of implementation and interpretation as our chief criteria, we propose that the log-F(1,1) prior provides a better default penalty than other proposals. Penalization based on more general log-F priors is trivial to implement and facilitates mean-squared error reduction and sensitivity analyses of penalty strength by varying the number of prior degrees of freedom. We caution however against penalization of intercepts, which are unduly sensitive to covariate coding and design idiosyncrasies. Copyright © 2015 John Wiley \& Sons, Ltd.},
	language = {en},
	number = {23},
	urldate = {2018-04-25},
	journal = {Statistics in Medicine},
	author = {Greenland, Sander and Mansournia, Mohammad Ali},
	month = oct,
	year = {2015},
	keywords = {maximum likelihood, penalized likelihood, Bayes estimators, bias correction, Firth bias reduction, Jeffreys prior, logistic regression, regularization, shrinkage, sparse data, stabilization},
	pages = {3133--3143}
}

@ARTICLE{Crome+1996,
  author = {F. H. J. Crome and M. R. Thomas and L. A. Moore},
  title = {A Novel {Bayesian} Approach to Assessing Impacts of Rain Forest Logging},
  journal = {Ecological Applications},
  year = {1996},
  volume = {6},
  pages = {1104-1123}
}

@misc{carpenter_computational_2017,
	title = {Computational and statistical issues with uniform interval priors},
	url = {http://andrewgelman.com/2017/11/28/computational-statistical-issues-uniform-interval-priors/},
	abstract = {There are two anti-patterns* for prior specification in Stan programs that can be sourced directly to idioms developed for BUGS. One is the diffuse gamma priors that Andrew’s already written about at length. The second is interval-based priors. Which brings us to today’s post. Interval priors An interval prior is something like this in Stan …},
	language = {en-US},
	urldate = {2018-05-15},
	journal = {Statistical Modeling, Causal Inference, and Social Science},
	author = {Carpenter, Bob},
	month = nov,
	year = {2017}
}


@article{lewandowski_generating_2009,
	title = {Generating random correlation matrices based on vines and extended onion method},
	volume = {100},
	issn = {0047-259X},
	url = {http://www.sciencedirect.com/science/article/pii/S0047259X09000876},
	doi = {10.1016/j.jmva.2009.04.008},
	abstract = {We extend and improve two existing methods of generating random correlation matrices, the onion method of Ghosh and Henderson [S. Ghosh, S.G. Henderson, Behavior of the norta method for correlated random vector generation as the dimension increases, ACM Transactions on Modeling and Computer Simulation (TOMACS) 13 (3) (2003) 276–294] and the recently proposed method of Joe [H. Joe, Generating random correlation matrices based on partial correlations, Journal of Multivariate Analysis 97 (2006) 2177–2189] based on partial correlations. The latter is based on the so-called D-vine. We extend the methodology to any regular vine and study the relationship between the multiple correlation and partial correlations on a regular vine. We explain the onion method in terms of elliptical distributions and extend it to allow generating random correlation matrices from the same joint distribution as the vine method. The methods are compared in terms of time necessary to generate 5000 random correlation matrices of given dimensions.},
	number = {9},
	urldate = {2018-07-27},
	journal = {Journal of Multivariate Analysis},
	author = {Lewandowski, Daniel and Kurowicka, Dorota and Joe, Harry},
	month = oct,
	year = {2009},
	keywords = {Correlation matrix, Dependence vines, Onion method, Partial correlation},
	pages = {1989--2001}
}


@article{gelman_statistical_2014,
	title = {The {Statistical} {Crisis} in {Science}},
	volume = {102},
	url = {https://search.proquest.com/openview/a83b0c4daa7508482a6b7e47eb7b8a3e/1?pq-origsite=gscholar&cbl=40798},
	language = {en},
	number = {6},
	urldate = {2018-07-28},
	journal = {American Scientist},
	author = {Gelman, Andrew and Loken, Eric},
	year = {2014},
	pages = {460--465}
}


@article{gelman_scaling_2008,
	title = {Scaling regression inputs by dividing by two standard deviations},
	volume = {27},
	issn = {02776715, 10970258},
	url = {http://doi.wiley.com/10.1002/sim.3107},
	doi = {10.1002/sim.3107},
	language = {en},
	number = {15},
	urldate = {2018-02-04},
	journal = {Statistics in Medicine},
	author = {Gelman, Andrew},
	month = jul,
	year = {2008},
	pages = {2865--2873}
}


@misc{singmann_compute_2018,
	title = {Compute effect sizes for mixed() objects},
	url = {https://afex.singmann.science/forums/topic/compute-effect-sizes-for-mixed-objects},
	author={Singmann, Henrik},
	urldate = {2018-07-31},
	journal = {afex: Analysis of Factorial EXperiments},
	month = jul,
	year = {2018}
}

@article{rights_quantifying_2018,
	title = {Quantifying explained variance in multilevel models: {An} integrative framework for defining {R}-squared measures.},
	doi = {10.1037%2Fmet0000184},
	abstract = {Researchers often mention the utility and need for R-squared measures of explained variance for multilevel models (MLMs). Although this topic has been addressed by methodologists, the MLM R-squared literature suffers from several shortcomings: (a) analytic relationships among existing measures have not been established so measures equivalent in the population have been redeveloped 2 or 3 times; (b) a completely full partitioning of variance has not been used to create measures, leading to gaps in the availability of measures to address key substantive questions; (c) a unifying approach to interpreting and choosing among measures has not been provided, leading to researchers’ difficulty with implementation; and (d) software has inconsistently and infrequently incorporated available measures. We address these issues with the following contributions. We develop an integrative framework of R-squared measures for MLMs with random intercepts and/or slopes based on a completely full decomposition of variance. We analytically relate 10 existing measures from different disciplines as special cases of 5 measures from our framework. We show how our framework fills gaps by supplying additional total and level-specific measures that answer new substantive research questions. To facilitate interpretation, we provide a novel and integrative graphical representation of all the measures in the framework; we use it to demonstrate limitations of current reporting practices for MLM R-squareds, as well as benefits of considering multiple measures from the framework in juxtaposition. We supply and empirically illustrate an R function, r2MLM, that computes all measures in our framework to help researchers in considering effect size and conveying practical significance.},
	journal = {Psychological Methods},
	author = {Rights, Jason D. and Sterba, Sonya K.},
	year = {2018}
}


@article{leemis_univariate_2008,
	title = {Univariate {Distribution} {Relationships}},
	volume = {62},
	issn = {0003-1305},
	url = {http://pubs.amstat.org/doi/abs/10.1198/000313008X270448?prevSearch=allfield%253A%2528leemis%2529&searchHistoryKey=},
	doi = {10.1198/000313008X270448},
	number = {1},
	urldate = {2009-10-06},
	journal = {The American Statistician},
	author = {Leemis, Lawrence M and McQueston, Jacquelyn T},
	month = feb,
	year = {2008},
	pages = {45--53}
}


@article{leemis_univariate_2012,
	title = {Univariate {Probability} {Distributions}},
	volume = {20},
	issn = {null},
	url = {https://doi.org/10.1080/10691898.2012.11889648},
	doi = {10.1080/10691898.2012.11889648},
	abstract = {We describe a web-based interactive graphic that can be used as a resource in introductory classes in mathematical statistics. This interactive graphic presents 76 common univariate distributions and gives details on (a) various features of the distribution such as the functional form of the probability density function and cumulative distribution function, graphs of the probability density function for various parameter settings, and values of population moments; (b) properties that the distribution possesses, for example, linear combinations of independent random variables from a particular distribution family also belong to the same distribution family; and (c) relationships between the various distributions, including special cases, transformations, limiting distributions, and Bayesian relationships. The interactive graphic went online on 11/30/12 at the URL www.math.wm.edu/ leemis/chart/UDR/UDR.html.},
	number = {3},
	urldate = {2018-09-04},
	journal = {Journal of Statistics Education},
	author = {Leemis, Lawrence M. and Luckett, Daniel J. and Powell, Austin G. and Vermeer, Peter E.},
	month = nov,
	year = {2012},
	keywords = {Continuous distributions, Discrete distributions, Distribution properties, Limiting distributions, Special Cases, Transformations, Univariate distributions},
	pages = {null}
}

@article{verbeke_effect_1997,
	title = {The effect of misspecifying the random-effects distribution in linear mixed models for longitudinal data},
	volume = {23},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/pii/S0167947396000473},
	doi = {10.1016/S0167-9473(96)00047-3},
	abstract = {Maximum likelihood estimators for fixed effects and variance components in linear mixed models, obtained under the assumption of normally distributed random effects, are shown to be consistent and asymptotically normally distributed, even when the random-effects distribution is not normal. However, a sandwich-type correction to the inverse Fisher information matrix is then needed in order to get the correct asymptotic covariance matrix. Extensive simulations show that the so-obtained corrected standard errors are clearly superior to the naive uncorrected ones, especially for the parameters in the random-effects covariance matrix, even in moderate samples.},
	number = {4},
	urldate = {2013-11-13},
	journal = {Computational Statistics \& Data Analysis},
	author = {Verbeke, Geert and Lesaffre, Emmanuel},
	month = feb,
	year = {1997},
	keywords = {Fixed effects, Longitudinal models, Variance components, Misspecification, Random effects},
	pages = {541--556}
}

@article{alonso_family_2008,
	title = {A family of tests to detect misspecifications in the random-effects structure of generalized linear mixed models},
	volume = {52},
	issn = {01679473},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S0167947308001540},
	doi = {10.1016/j.csda.2008.02.033},
	number = {9},
	urldate = {2013-09-04},
	journal = {Computational Statistics \& Data Analysis},
	author = {Alonso, A. and Litière, S. and Molenberghs, G.},
	month = may,
	year = {2008},
	pages = {4474--4486}
}

@article{abad_testing_2010,
	title = {Testing for misspecification in generalized linear mixed models},
	volume = {11},
	issn = {1468-4357},
	doi = {10.1093/biostatistics/kxq019},
	abstract = {Generalized linear mixed models have become a frequently used tool for the analysis of non-Gaussian longitudinal data. Estimation is often based on maximum likelihood theory, which assumes that the underlying probability model is correctly specified. Recent research shows that the results obtained from these models are not always robust against departures from the assumptions on which they are based. Therefore, diagnostic tools for the detection of model misspecifications are of the utmost importance. In this paper, we propose 2 diagnostic tests that are based on 2 equivalent representations of the model information matrix. We evaluate the power of both tests using theoretical considerations as well as via simulation. In the simulations, the performance of the new tools is evaluated in many settings of practical relevance, focusing on misspecification of the random-effects structure. In all the scenarios, the results were encouraging, however, the tests also exhibited inflated Type I error rates when the sample size was small or moderate. Importantly, a parametric bootstrap version of the tests seems to overcome this problem, although more research in this direction may be needed. Finally, both tests were also applied to analyze a real case study in psychiatry.},
	language = {eng},
	number = {4},
	journal = {Biostatistics (Oxford, England)},
	author = {Abad, Ariel Alonso and Litière, Saskia and Molenberghs, Geert},
	month = oct,
	year = {2010},
	pmid = {20407039},
	keywords = {Linear Models, Risperidone, Longitudinal Studies, Algorithms, Computer Simulation, Antipsychotic Agents, Software, Humans, Schizophrenia, Likelihood Functions, Bias (Epidemiology), Randomized Controlled Trials as Topic, Treatment Outcome},
	pages = {771--786}
}

@article{huang_diagnosis_2009,
	title = {Diagnosis of random-effect model misspecification in generalized linear mixed models for binary response},
	volume = {65},
	issn = {1541-0420},
	url = {http://www.ncbi.nlm.nih.gov/pubmed/18759837},
	doi = {10.1111/j.1541-0420.2008.01103.x},
	abstract = {SUMMARY: Generalized linear mixed models (GLMMs) are widely used in the analysis of clustered data. However, the validity of likelihood-based inference in such analyses can be greatly affected by the assumed model for the random effects. We propose a diagnostic method for random-effect model misspecification in GLMMs for clustered binary response. We provide a theoretical justification of the proposed method and investigate its finite sample performance via simulation. The proposed method is applied to data from a longitudinal respiratory infection study.},
	number = {2},
	urldate = {2010-12-02},
	journal = {Biometrics},
	author = {Huang, Xianzheng},
	month = jun,
	year = {2009},
	pmid = {18759837},
	keywords = {Linear Models, Biometry, Algorithms, Computer Simulation, Pattern Recognition, Automated, Risk Assessment, Cluster Analysis, Epidemiologic Research Design, Reproducibility of Results, Sensitivity and Specificity, Data Interpretation, Statistical, Artifacts},
	pages = {361--368}
}


@article{schad_how_2018,
	title = {How to capitalize on a priori contrasts in linear (mixed) models: {A} tutorial},
	shorttitle = {How to capitalize on a priori contrasts in linear (mixed) models},
	url = {http://arxiv.org/abs/1807.10451},
	abstract = {Factorial experiments in research on memory, language, and in other areas are often analyzed using analysis of variance (ANOVA). However, for experimental factors with more than two levels, the ANOVA omnibus F-test is not informative about the source of a main effect or interaction. This is unfortunate as researchers typically have specific hypotheses about which condition means differ from each other. A priori contrasts (i.e., comparisons planned before the sample means are known) between specific conditions or combinations of conditions are the appropriate way to represent such hypotheses in the statistical model. Many researchers have pointed out that contrasts should be "tested instead of, rather than as a supplement to, the ordinary `omnibus' F test" (Hayes, 1973, p. 601). In this tutorial, we explain the mathematics underlying different kinds of contrasts (i.e., treatment, sum, repeated, Helmert, and polynomial contrasts), discuss their properties, and demonstrate how they are applied in the R System for Statistical Computing (R Core Team, 2018). In this context, we explain the generalized inverse which is needed to compute the weight coefficients for contrasts that test hypotheses that are not covered by the default set of contrasts. A detailed understanding of contrast coding is crucial for successful and correct specification in linear models (including linear mixed models). Contrasts defined a priori yield far more precise confirmatory tests of experimental hypotheses than standard omnibus F-test.},
	urldate = {2018-09-10},
	journal = {arXiv:1807.10451 [stat]},
	author = {Schad, Daniel J. and Hohenstein, Sven and Vasishth, Shravan and Kliegl, Reinhold},
	month = jul,
	year = {2018},
	note = {arXiv: 1807.10451},
	keywords = {Statistics - Methodology}
}


@article{joe_accuracy_2008,
	title = {Accuracy of {Laplace} approximation for discrete response mixed models},
	volume = {52},
	issn = {0167-9473},
	url = {http://www.sciencedirect.com/science/article/pii/S0167947308002533},
	doi = {10.1016/j.csda.2008.05.002},
	abstract = {The Laplace approximation is amongst the computational methods used for estimation in generalized linear mixed models. It is computationally the fastest, but there hasn’t been a clear analysis of when its accuracy is adequate. In this paper, for a few factors we do calculations for a variety of mixed models to show patterns in the asymptotic bias of the estimator based on the maximum of the Laplace approximation of the log-likelihood. The biggest factor for asymptotic bias is the amount of discreteness in the response variable; there is more bias for binary and ordinal responses than for a count response, and more bias for a count response when its support is mainly near 0. When there is bias, the bias decreases as the cluster size increases. Often, the Laplace approximation is adequate even for small cluster sizes. Even with bias, the Laplace approximation may be adequate for quick assessment of competing mixed models with different random effects and covariates.},
	number = {12},
	urldate = {2018-09-21},
	journal = {Computational Statistics \& Data Analysis},
	author = {Joe, Harry},
	month = aug,
	year = {2008},
	pages = {5066--5074}
	}


@article{ben_quantilequantile_2004,
	title = {Quantile-{Quantile} {Plot} for {Deviance} {Residuals} in the {Generalized} {Linear} {Model}},
	volume = {13},
	issn = {1061-8600},
	url = {http://pubs.amstat.org/doi/abs/10.1198/1061860042949_a},
	doi = {10.1198/1061860042949_a},
	number = {1},
	urldate = {2010-02-02},
	journal = {Journal of Computational and Graphical Statistics},
	author = {Ben, Marta García and Yohai, Víctor J},
	month = mar,
	year = {2004},
	pages = {36--47}
}

@Manual{hartig_dharma_2018,
    title = {DHARMa: Residual Diagnostics for Hierarchical (Multi-Level / Mixed)
Regression Models},
    author = {Florian Hartig},
    year = {2018},
    note = {R package version 0.2.0},
    url = {https://CRAN.R-project.org/package=DHARMa},
  }


@article{hosmer_comparison_1997,
	title = {A {Comparison} of {Goodness}-of-{Fit} {Tests} for the {Logistic} {Regression} {Model}},
	volume = {16},
	copyright = {Copyright © 1997 John Wiley \& Sons, Ltd.},
	issn = {1097-0258},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/%28SICI%291097-0258%2819970515%2916%3A9%3C965%3A%3AAID-SIM509%3E3.0.CO%3B2-O},
	doi = {10.1002/(SICI)1097-0258(19970515)16:9<965::AID-SIM509>3.0.CO;2-O},
	abstract = {Recent work has shown that there may be disadvantages in the use of the chi-square-like goodness-of-fit tests for the logistic regression model proposed by Hosmer and Lemeshow that use fixed groups of the estimated probabilities. A particular concern with these grouping strategies based on estimated probabilities, fitted values, is that groups may contain subjects with widely different values of the covariates. It is possible to demonstrate situations where one set of fixed groups shows the model fits while the test rejects fit using a different set of fixed groups. We compare the performance by simulation of these tests to tests based on smoothed residuals proposed by le Cessie and Van Houwelingen and Royston, a score test for an extended logistic regression model proposed by Stukel, the Pearson chi-square and the unweighted residual sum-of- squares. These simulations demonstrate that all but one of Royston's tests have the correct size. An examination of the performance of the tests when the correct model has a quadratic term but a model containing only the linear term has been fit shows that the Pearson chi-square, the unweighted sum-of-squares, the Hosmer–Lemeshow decile of risk, the smoothed residual sum-of-squares and Stukel's score test, have power exceeding 50 per cent to detect moderate departures from linearity when the sample size is 100 and have power over 90 per cent for these same alternatives for samples of size 500. All tests had no power when the correct model had an interaction between a dichotomous and continuous covariate but only the continuous covariate model was fit. Power to detect an incorrectly specified link was poor for samples of size 100. For samples of size 500 Stukel's score test had the best power but it only exceeded 50 per cent to detect an asymmetric link function. The power of the unweighted sum-of-squares test to detect an incorrectly specified link function was slightly less than Stukel's score test. We illustrate the tests within the context of a model for factors associated with low birth weight. © 1997 by John Wiley \& Sons, Ltd. Stat. Med., Vol. 16, 965–980 (1997).},
	language = {en},
	number = {9},
	urldate = {2018-10-03},
	journal = {Statistics in Medicine},
	author = {Hosmer, D. W. and Hosmer, T. and Cessie, S. Le and Lemeshow, S.},
	month = may,
	year = {1997},
	pages = {965--980}
}


@article{xu_assessment_2015,
	title = {Assessment and {Selection} of {Competing} {Models} for {Zero}-{Inflated} {Microbiome} {Data}},
	volume = {10},
	issn = {1932-6203},
	url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0129606},
	doi = {10.1371/journal.pone.0129606},
	abstract = {Typical data in a microbiome study consist of the operational taxonomic unit (OTU) counts that have the characteristic of excess zeros, which are often ignored by investigators. In this paper, we compare the performance of different competing methods to model data with zero inflated features through extensive simulations and application to a microbiome study. These methods include standard parametric and non-parametric models, hurdle models, and zero inflated models. We examine varying degrees of zero inflation, with or without dispersion in the count component, as well as different magnitude and direction of the covariate effect on structural zeros and the count components. We focus on the assessment of type I error, power to detect the overall covariate effect, measures of model fit, and bias and effectiveness of parameter estimations. We also evaluate the abilities of model selection strategies using Akaike information criterion (AIC) or Vuong test to identify the correct model. The simulation studies show that hurdle and zero inflated models have well controlled type I errors, higher power, better goodness of fit measures, and are more accurate and efficient in the parameter estimation. Besides that, the hurdle models have similar goodness of fit and parameter estimation for the count component as their corresponding zero inflated models. However, the estimation and interpretation of the parameters for the zero components differs, and hurdle models are more stable when structural zeros are absent. We then discuss the model selection strategy for zero inflated data and implement it in a gut microbiome study of {\textgreater} 400 independent subjects.},
	language = {en},
	number = {7},
	urldate = {2018-10-29},
	journal = {PLOS ONE},
	author = {Xu, Lizhen and Paterson, Andrew D. and Turpin, Williams and Xu, Wei},
	month = jul,
	year = {2015},
	keywords = {Bacteria, Campylobacter, Mathematical functions, Microbiome, Probability distribution, Research errors, Simulation and modeling, Test statistics},
	pages = {e0129606}
}


@article{mcculloch_misspecifying_2011,
	title = {Misspecifying the {Shape} of a {Random} {Effects} {Distribution}: {Why} {Getting} {It} {Wrong} {May} {Not} {Matter}},
	volume = {26},
	issn = {0883-4237},
	shorttitle = {Misspecifying the {Shape} of a {Random} {Effects} {Distribution}},
	url = {http://projecteuclid.org/euclid.ss/1320066927},
	doi = {10.1214/11-STS361},
	language = {en},
	number = {3},
	urldate = {2018-11-22},
	journal = {Statistical Science},
	author = {McCulloch, Charles E. and Neuhaus, John M.},
	month = aug,
	year = {2011},
	pages = {388--402}
}


@book{lee_generalized_2017,
	address = {Boca Raton, Florida},
	edition = {2 edition},
	title = {Generalized {Linear} {Models} with {Random} {Effects}: {Unified} {Analysis} via {H}-likelihood, {Second} {Edition}},
	isbn = {978-1-4987-2061-8},
	shorttitle = {Generalized {Linear} {Models} with {Random} {Effects}},
	abstract = {This is the second edition of a monograph on generalized linear models with random effects that extends the classic work of McCullagh and Nelder. It has been thoroughly updated, with around 80 pages added, including new material on the extended likelihood approach that strengthens the theoretical basis of the methodology, new developments in variable selection and multiple testing, and new examples and applications. It includes an R package for all the methods and examples that supplement the book.},
	language = {English},
	publisher = {Chapman and Hall/CRC},
	author = {Lee, Youngjo and Nelder, John A. and Pawitan, Yudi},
	month = aug,
	year = {2017}
}


@article{smithson_better_2006,
	title = {A better lemon squeezer? {Maximum}-likelihood regression with beta-distributed dependent variables},
	volume = {11},
	issn = {1082-989X},
	shorttitle = {A better lemon squeezer?},
	doi = {2006-03820-004},
	abstract = {Uncorrectable skew and heteroscedasticity are among the "lemons" of psychological data, yet many important variables naturally exhibit these properties. For scales with a lower and upper bound, a suitable candidate for models is the beta distribution, which is very flexible and models skew quite well. The authors present maximum-likelihood regression models assuming that the dependent variable is conditionally beta distributed rather than Gaussian. The approach models both means (location) and variances (dispersion) with their own distinct sets of predictors (continuous and/or categorical), thereby modeling heteroscedasticity. The location sub-model link function is the logit and thereby analogous to logistic regression, whereas the dispersion sub-model is log linear. Real examples show that these models handle the independent observations case readily. The article discusses comparisons between beta regression and alternative techniques, model selection and interpretation, practical estimation, and software.},
	number = {1},
	journal = {Psychological Methods},
	author = {Smithson, Michael and Verkuilen, Jay},
	month = mar,
	year = {2006},
	pmid = {16594767},
	keywords = {Linear Models, Analysis of Variance, Least-Squares Analysis, Models, Statistical, Regression Analysis, Humans, Reproducibility of Results, Normal Distribution, Likelihood Functions, Child, Data Interpretation, Statistical, Bias (Epidemiology), Dyslexia},
	pages = {54--71}
}


@techreport{cribari-neto_beta_2009,
	address = {Vienna, Austria},
	title = {Beta {Regression} in {R}},
	url = {https://cran.r-project.org/web/packages/betareg/index.html},
	abstract = {The class of beta regression models is commonly used by practitioners to model variables that assume values in the standard unit interval (0, 1). It is based on the assumption that the dependent variable is beta-distributed and that its mean is related to a set of regressors through a linear predictor with unknown coefficients and a link function. The model also includes a precision parameter which may be constant or depend on a (potentially different) set of regressors through a link function as well. This approach naturally incorporates features such as heteroskedasticity or skewness which are commonly observed in data taking values in the standard unit interval, such as rates or proportions. This paper describes the betareg package which provides the class of beta regressions in the R system for statistical computing. The underlying theory is briefly outlined, the implementation discussed and illustrated in various replication exercises.},
	language = {en},
	number = {98},
	urldate = {2019-01-05},
	institution = {WU Vienna University of Economics and Business},
	author = {Cribari-Neto, Francisco and Zeileis, Achim},
	year = {2009}
}


@article{heisterkamp_update_2017,
	title = {Update of the nlme {Package} to {Allow} a {Fixed} {Standard} {Deviation} of the {Residual} {Error}},
	volume = {9},
	number = {1},
	journal = {The R Journal},
	author = {Heisterkamp, Simon H. and van Willigen, Engelbertus and Diderichsen, Paul-Matthias and Maringwa, John},
	year = {2017},
	pages = {239--251}
}


@article{dezeure_high-dimensional_2015,
	title = {High-{Dimensional} {Inference}: {Confidence} {Intervals}, \$p\$-{Values} and {R}-{Software} hdi},
	volume = {30},
	issn = {0883-4237, 2168-8745},
	shorttitle = {High-{Dimensional} {Inference}},
	url = {https://projecteuclid.org/euclid.ss/1449670857},
	doi = {10.1214/15-STS527},
	abstract = {We present a (selective) review of recent frequentist high-dimensional inference methods for constructing ppp-values and confidence intervals in linear and generalized linear models. We include a broad, comparative empirical study which complements the viewpoint from statistical methodology and theory. Furthermore, we introduce and illustrate the R-package hdi which easily allows the use of different methods and supports reproducibility.},
	language = {EN},
	number = {4},
	urldate = {2019-01-09},
	journal = {Statistical Science},
	author = {Dezeure, Ruben and Bühlmann, Peter and Meier, Lukas and Meinshausen, Nicolai},
	month = nov,
	year = {2015},
	mrnumber = {MR3432840},
	zmnumber = {06946201},
	keywords = {\$p\$-value, Clustering, confidence interval, generalized linear model, high-dimensional statistical inference, linear model, multiple testing, R-software},
	pages = {533--558}
}

@book{james_introduction_2013,
	title = {An introduction to statistical learning},
	volume = {112},
	publisher = {Springer},
	author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
	year = {2013}
}


@article{lynch_dealing_2014,
	title = {Dealing with under- and over-dispersed count data in life history, spatial, and community ecology},
	volume = {95},
	issn = {0012-9658},
	url = {https://www.jstor.org/stable/43495231},
	abstract = {[Count data arise frequently in ecological analyses, but regularly violate the equi-dispersion constraint imposed by the most popular distribution for analyzing these data, the Poisson distribution. Several approaches for addressing over-dispersion have been developed (e.g., negative binomial distribution), but methods for including both under-dispersion and over-dispersion have been largely overlooked. We provide three specific examples drawn from life-history theory, spatial ecology, and community ecology, and illustrate the use of the Conway-Maxwell-Poisson (CMP) distribution as compared to other common models for count data. We find that where equi-dispersion is violated, the CMP distribution performs significantly better than the Poisson distribution, as assessed by information criteria that account for the CMP's additional distribution parameter. The Conway-Maxwell-Poisson distribution has seen rapid development in other fields such as risk analysis and linguistics, but is relatively unknown in the ecological literature. In addition to providing a more flexible exponential distribution for count data that is easily integrated into generalized linear models, the CMP allows ecologists to focus on the magnitude of under-or over-dispersion as opposed to the simple rejection of the equi-dispersion null hypothesis. By demonstrating its suitability in a variety of common ecological applications, we hope to encourage its wider adoption as a flexible alternative to the Poisson.]},
	number = {11},
	urldate = {2019-01-27},
	journal = {Ecology},
	author = {Lynch, Heather J. and Thorson, James T. and Shelton, Andrew Olaf},
	year = {2014},
	pages = {3173--3180}
}


@article{vats_revisiting_2018,
	title = {Revisiting the {Gelman}-{Rubin} {Diagnostic}},
	url = {http://arxiv.org/abs/1812.09384},
	abstract = {Gelman and Rubin's (1992) convergence diagnostic is one of the most popular methods for terminating a Markov chain Monte Carlo (MCMC) sampler. Since the seminal paper, researchers have developed sophisticated methods of variance estimation for Monte Carlo averages. We show that this class of estimators find immediate use in the Gelman-Rubin statistic, a connection not established in the literature before. We incorporate these estimators to upgrade both the univariate and multivariate Gelman-Rubin statistics, leading to increased stability in MCMC termination time. An immediate advantage is that our new Gelman-Rubin statistic can be calculated for a single chain. In addition, we establish a relationship between the Gelman-Rubin statistic and effective sample size. Leveraging this relationship, we develop a principled cutoff criterion for the Gelman-Rubin statistic. Finally, we demonstrate the utility of our improved diagnostic via examples.},
	urldate = {2019-01-29},
	journal = {arXiv:1812.09384 [stat]},
	author = {Vats, Dootika and Knudson, Christina},
	month = dec,
	year = {2018},
	note = {arXiv: 1812.09384},
	keywords = {Statistics - Computation, Statistics - Methodology}
}


@article{nakagawa_coefficient_2017,
	title = {The coefficient of determination {R}2 and intra-class correlation coefficient from generalized linear mixed-effects models revisited and expanded},
	volume = {14},
	url = {https://royalsocietypublishing.org/doi/full/10.1098/rsif.2017.0213},
	doi = {10.1098/rsif.2017.0213},
	abstract = {The coefficient of determination R2 quantifies the proportion of variance explained by a statistical model and is an important summary statistic of biological interest. However, estimating R2 for generalized linear mixed models (GLMMs) remains challenging. We have previously introduced a version of R2 that we called  for Poisson and binomial GLMMs, but not for other distributional families. Similarly, we earlier discussed how to estimate intra-class correlation coefficients (ICCs) using Poisson and binomial GLMMs. In this paper, we generalize our methods to all other non-Gaussian distributions, in particular to negative binomial and gamma distributions that are commonly used for modelling biological data. While expanding our approach, we highlight two useful concepts for biologists, Jensen's inequality and the delta method, both of which help us in understanding the properties of GLMMs. Jensen's inequality has important implications for biologically meaningful interpretation of GLMMs, whereas the delta method allows a general derivation of variance associated with non-Gaussian distributions. We also discuss some special considerations for binomial GLMMs with binary or proportion data. We illustrate the implementation of our extension by worked examples from the field of ecology and evolution in the R environment. However, our method can be used across disciplines and regardless of statistical environments.},
	number = {134},
	urldate = {2019-01-29},
	journal = {Journal of The Royal Society Interface},
	author = {Nakagawa, Shinichi and Johnson, Paul C. D. and Schielzeth, Holger},
	month = sep,
	year = {2017},
	pages = {20170213}
}

@article{hedeker_note_2018,
	title = {A note on marginalization of regression parameters from mixed models of binary outcomes},
	volume = {74},
	copyright = {© 2017, The International Biometric Society},
	issn = {1541-0420},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/biom.12707},
	doi = {10.1111/biom.12707},
	abstract = {This article discusses marginalization of the regression parameters in mixed models for correlated binary outcomes. As is well known, the regression parameters in such models have the “subject-specific” (SS) or conditional interpretation, in contrast to the “population-averaged” (PA) or marginal estimates that represent the unconditional covariate effects. We describe an approach using numerical quadrature to obtain PA estimates from their SS counterparts in models with multiple random effects. Standard errors for the PA estimates are derived using the delta method. We illustrate our proposed method using data from a smoking cessation study in which a binary outcome (smoking, Y/N) was measured longitudinally. We compare our estimates to those obtained using GEE and marginalized multilevel models, and present results from a simulation study.},
	language = {en},
	number = {1},
	urldate = {2019-04-25},
	journal = {Biometrics},
	author = {Hedeker, Donald and Toit, Stephen H. C. du and Demirtas, Hakan and Gibbons, Robert D.},
	year = {2018},
	keywords = {Clustered data, Longitudinal data, Multilevel models, Population-averaged estimates, Subject-specific estimates},
	pages = {354--361}
}


@book{madsen_introduction_2011,
	title = {Introduction to {General} and {Generalized} {Linear} {Models}},
	isbn = {978-1-4200-9155-7},
	abstract = {Bridging the gap between theory and practice for modern statistical model building, Introduction to General and Generalized Linear Models presents likelihood-based techniques for statistical modelling using various types of data. Implementations using R are provided throughout the text, although other software packages are also discussed. Numerous examples show how the problems are solved with R.After describing the necessary likelihood theory, the book covers both general and generalized linear models using the same likelihood-based methods. It presents the corresponding/parallel results for the general linear models first, since they are easier to understand and often more well known. The authors then explore random effects and mixed effects in a Gaussian context. They also introduce non-Gaussian hierarchical models that are members of the exponential family of distributions. Each chapter contains examples and guidelines for solving the problems via R. Providing a flexible framework for data analysis and model building, this text focuses on the statistical methods and models that can help predict the expected value of an outcome, dependent, or response variable. It offers a sound introduction to general and generalized linear models using the popular and powerful likelihood techniques. Ancillary materials are available at \verb!www.imm.dtu.dk/~hm/GLM!},
	language = {en},
	publisher = {CRC Press},
	author = {Madsen, Henrik and Thyregod, Poul},
	year = {2011},
	keywords = {Mathematics / Probability \& Statistics / General, Business \& Economics / Statistics}
}


@book{hodges_richly_2016,
	title = {Richly Parameterized Linear Models: Additive, Time Series, and Spatial Models Using Random Effects},
	shorttitle = {Richly Parameterized Linear Models},
	abstract = {A First Step toward a Unified Theory of Richly Parameterized Linear Models Using mixed linear models to analyze data often leads to results that are mysterious, inconvenient, or wrong. Further compounding the problem, statisticians lack a cohesive resource to acquire a systematic, theory-based understanding of models with random effects. Richly Parameterized Linear Models: Additive, Time Series, and Spatial Models Using Random Effects takes a first step in developing a full theory of richly parameterized models, which would allow statisticians to better understand their analysis results. The author examines what is known and unknown about mixed linear models and identifies research opportunities.The first two parts of the book cover an existing syntax for unifying models with random effects. The text explains how richly parameterized models can be expressed as mixed linear models and analyzed using conventional and Bayesian methods.In the last two parts, the author discusses oddities that can arise when analyzing data using these models. He presents ways to detect problems and, when possible, shows how to mitigate or avoid them. The book adapts ideas from linear model theory and then goes beyond that theory by examining the information in the data about the mixed linear model’s covariance matrices.Each chapter ends with two sets of exercises. Conventional problems encourage readers to practice with the algebraic methods and open questions motivate readers to research further. Supporting materials, including datasets for most of the examples analyzed, are available on the author’s website.},
	language = {English},
	publisher = {Chapman and Hall/CRC},
	author = {Hodges, James S.},
	month = apr,
	year = {2016}
}


@article{lee_analysis_2020,
	title = {Analysis of grouped data using conjugate generalized linear mixed models},
	volume = {107},
	issn = {0006-3444},
	url = {https://academic.oup.com/biomet/article/107/1/231/5607066},
	doi = {10.1093/biomet/asz053},
	abstract = {Summary.  This article concerns a class of generalized linear mixed models for two-level grouped data, where the random effects are uniquely indexed by groups a},
	language = {en},
	number = {1},
	urldate = {2020-10-02},
	journal = {Biometrika},
	author = {Lee, Jarod Y. L. and Green, Peter J. and Ryan, Louise M.},
	month = mar,
	year = {2020},
	note = {Publisher: Oxford Academic},
	pages = {231--237}
}

@article{lee_poisson_2017,
	title = {On the "{Poisson} {Trick}" and its {Extensions} for {Fitting} {Multinomial} {Regression} {Models}},
	url = {http://arxiv.org/abs/1707.08538},
	abstract = {This article is concerned with the fitting of multinomial regression models using the so-called "Poisson Trick". The work is motivated by Chen \& Kuo (2001) and Malchow-M\oller \& Svarer (2003) which have been criticized for being computationally inefficient and sometimes producing nonsense results. We first discuss the case of independent data and offer a parsimonious fitting strategy when all covariates are categorical. We then propose a new approach for modelling correlated responses based on an extension of the Gamma-Poisson model, where the likelihood can be expressed in closed-form. The parameters are estimated via an Expectation/Conditional Maximization (ECM) algorithm, which can be implemented using functions for fitting generalized linear models readily available in standard statistical software packages. Compared to existing methods, our approach avoids the need to approximate the intractable integrals and thus the inference is exact with respect to the approximating Gamma-Poisson model. The proposed method is illustrated via a reanalysis of the yogurt data discussed by Chen \& Kuo (2001).},
	urldate = {2020-10-02},
	journal = {arXiv:1707.08538 [stat]},
	author = {Lee, Jarod Y. L. and Green, Peter J. and Ryan, Louise M.},
	month = jul,
	year = {2017},
	note = {arXiv: 1707.08538},
	keywords = {Statistics - Applications, Statistics - Computation, Statistics - Methodology}
}


@book{barr_learning_2020,
	title = {Learning {Statistical} {Models} {Through} {Simulation} in {R}},
	year = {2020},
	url = {https://psyteachr.github.io/ug3-stats/},
	abstract = {Textbook on statistical models for social scientists.},
	urldate = {2021-01-08},
	author = {Barr, Dale J.},
	series = {PsyTeachR books}
}


@article{jaeger_r2_2017,
	title = {An {R2} statistic for fixed effects in the generalized linear mixed model},
	volume = {44},
	issn = {0266-4763},
	url = {https://doi.org/10.1080/02664763.2016.1193725},
	doi = {10.1080/02664763.2016.1193725},
	abstract = {Measuring the proportion of variance explained (R2) by a statistical model and the relative importance of specific predictors (semi-partial R2) can be essential considerations when building a parsimonious statistical model. The R2 statistic is a familiar summary of goodness-of-fit for normal linear models and has been extended in various ways to more general models. In particular, the generalized linear mixed model (GLMM) extends the normal linear model and is used to analyze correlated (hierarchical), non-normal data structures. Although various R2 statistics have been proposed, there is no consensus in statistical literature for the most sensible definition of R2 in this context. This research aims to build upon existing knowledge and definitions of R2 and to concisely define the statistic for the GLMM. Here, we derive a model and semi-partial R2 statistic for fixed (population) effects in the GLMM by utilizing the penalized quasi-likelihood estimation method based on linearization. We show that our proposed R2 statistic generalizes the widely used marginal R2 statistic introduced by Nakagawa and Schielzeth, demonstrate our statistics capability in model selection, show the utility of semi-partial R2 statistics in longitudinal data analysis, and provide software that computes the proposed R2 statistic along with semi-partial R2 for individual fixed effects. The software provided is adapted for both SAS and R programming languages.},
	number = {6},
	urldate = {2020-03-11},
	journal = {Journal of Applied Statistics},
	author = {Jaeger, Byron C. and Edwards, Lloyd J. and Das, Kalyan and Sen, Pranab K.},
	month = apr,
	year = {2017},
	keywords = {generalized linear mixed model, R-squared, Blood pressure, 62-07, 62Fxx, 62Hxx, 62Pxx, clustered data, statistical software},
	pages = {1086--1105}
}


@article{oberpriller_fixed_2021,
	title = {Fixed or random? {On} the reliability of mixed-effects models for a small number of levels in grouping variables},
	copyright = {© 2021, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/},
	shorttitle = {Fixed or random?},
	url = {https://www.biorxiv.org/content/10.1101/2021.05.03.442487v2},
	doi = {10.1101/2021.05.03.442487},
	abstract = {{\textless}p{\textgreater}Biological data are often intrinsically hierarchical. Due to their ability to account for such dependencies, mixed-effects models have become a common analysis technique in ecology and evolution. While many questions around their theoretical foundations and practical applications are solved, one fundamental question is still highly debated: When having a low number of levels should we model a grouping variable as a random or fixed effect? In such situation, the variance of the random effect is presumably underestimated, but whether this affects the statistical properties of the ecological effects (fixed effects) is unclear. Here, we analyzed the consequences of including a grouping variable as fixed or random effect and possible other modeling options (too simple or too complex models) for data with small number of levels in the grouping variable (2 - 8). For all models, we calculated type I error rates, power and coverage. Moreover, we show how these statistical properties change with the study design. We found that mixed-effects models already correctly estimate the variance of a random effect with only two groups. Moreover, model choice does not influence the statistical properties of the ecological effect when there is no random slope in the data-generating process. However, if an ecological effect differs among groups, using a random slope and intercept model, and switching to a fixed-effect model only in case of a singular fit, avoids overconfidence in the results. Additionally, power and type I error are strongly influenced by the number of and difference between groups. We conclude that inferring the correct random effect structure is of high importance to get correct statistical properties. When in doubt, we recommend starting with the simpler model and using model diagnostics to identify missing components. When having identified the correct structure, we encourage to start with a mixed-effects model independent of the number of groups and switch to a fixed-effect model only in case of a singular fit. With these recommendations, we allow for more informative choices about study design and data analysis and thus make ecological inference with mixed-effects models more robust for small number of groups.{\textless}/p{\textgreater}},
	language = {en},
	urldate = {2021-06-28},
	journal = {bioRxiv},
	author = {Oberpriller, Johannes and Leite, Melina de Souza and Pichler, Maximilian},
	month = jun,
	year = {2021},
	note = {Publisher: Cold Spring Harbor Laboratory
Section: New Results},
	pages = {2021.05.03.442487}
}


@article{arnqvist_mixed_2020,
	title = {Mixed {Models} {Offer} {No} {Freedom} from {Degrees} of {Freedom}},
	volume = {35},
	issn = {01695347},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0169534719303465},
	doi = {10.1016/j.tree.2019.12.004},
	language = {en},
	number = {4},
	urldate = {2021-06-30},
	journal = {Trends in Ecology \& Evolution},
	author = {Arnqvist, Göran},
	month = apr,
	year = {2020},
	pages = {329--335}
}