bibliography.bib

@book{abu-mostafaLearningData2012a,
  title = {Learning {{From Data}}},
  author = {Abu-Mostafa, Yaser S. and Magdon-Ismail, Malik and Lin, Hsuan-Tien},
  date = {2012-01-01},
  publisher = {{AMLBook}},
  location = {{S.l.}},
  isbn = {978-1-60049-006-4},
  langid = {english},
  pagetotal = {213}
}

@inproceedings{akibaOptunaNextgenerationHyperparameter2019,
  title = {Optuna: {{A Next-generation Hyperparameter Optimization Framework}}},
  shorttitle = {Optuna},
  booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
  date = {2019-07-25},
  series = {{{KDD}} '19},
  pages = {2623--2631},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  doi = {10.1145/3292500.3330701},
  url = {https://doi.org/10.1145/3292500.3330701},
  urldate = {2022-11-16},
  abstract = {The purpose of this study is to introduce new design-criteria for next-generation hyperparameter optimization software. The criteria we propose include (1) define-by-run API that allows users to construct the parameter search space dynamically, (2) efficient implementation of both searching and pruning strategies, and (3) easy-to-setup, versatile architecture that can be deployed for various purposes, ranging from scalable distributed computing to light-weight experiment conducted via interactive interface. In order to prove our point, we will introduce Optuna, an optimization software which is a culmination of our effort in the development of a next generation optimization software. As an optimization software designed with define-by-run principle, Optuna is particularly the first of its kind. We will present the design-techniques that became necessary in the development of the software that meets the above criteria, and demonstrate the power of our new design through experimental results and real world applications. Our software is available under the MIT license (https://github.com/pfnet/optuna/).},
  isbn = {978-1-4503-6201-6},
  keywords = {Bayesian optimization,black-box optimization,hyperparameter optimization,machine learning system},
  file = {/Users/michelsen/Zotero/storage/94LWRTM9/akiba2019.pdf.pdf;/Users/michelsen/Zotero/storage/Q5BCLKT4/Akiba et al. - 2019 - Optuna A Next-generation Hyperparameter Optimizat.pdf}
}

@article{al-asadiInferenceVisualizationDNA,
  title = {Inference and Visualization of {{DNA}} Damage Patterns Using a Grade of Membership Model},
  author = {Al-Asadi, Hussein and Dey, Kushal K and Novembre, John and Stephens, Matthew},
  pages = {7},
  abstract = {Motivation: Quality control plays a major role in the analysis of ancient DNA (aDNA). One key step in this quality control is assessment of DNA damage: aDNA contains unique signatures of DNA damage that distinguish it from modern DNA, and so analyses of damage patterns can help confirm that DNA sequences obtained are from endogenous aDNA rather than from modern contamination. Predominant signatures of DNA damage include a high frequency of cytosine to thymine substitutions (C-to-T) at the ends of fragments, and elevated rates of purines (A \& G) before the 50 strand-breaks. Existing QC procedures help assess damage by simply plotting for each sample, the C-to-T mismatch rate along the read and the composition of bases before the 50 strand-breaks. Here we present a more flexible and comprehensive model-based approach to infer and visualize damage patterns in aDNA, implemented in an R package aRchaic. This approach is based on a ‘grade of membership’ model (also known as ‘admixture’ or ‘topic’ model) in which each sample has an estimated grade of membership in each of K damage profiles that are estimated from the data.},
  langid = {english},
  keywords = {adna,ancient dna,damage,damage pattern,mismatch},
  file = {/Users/michelsen/Zotero/storage/UXRKCH8R/Al-Asadi et al. - Inference and visualization of DNA damage patterns.pdf}
}

@thesis{al-nakeebMachineLearningTools2017,
  type = {phdthesis},
  title = {Machine {{Learning Tools}} for {{DNA Sequence Analysis}}},
  author = {Al-Nakeeb, Kosai},
  date = {2017-12},
  institution = {{Technical University of Copenhagen}},
  location = {{Department of Bio and Health Informatics}}
}

@misc{anaemiasNutritionalAnaemiasReport1968,
  title = {Nutritional Anaemias : Report of a {{WHO}} Scientific Group [Meeting Held in {{Geneva}} from 13 to 17 {{March}} 1967]},
  author = {Anaemias, WHO Scientific Group on Nutritional and Organization, World Health},
  date = {1968},
  series = {World {{Health Organization}} Technical Report Series ; No. 405},
  pages = {Russian version of nos. 404-438 bound together (barcode no. 00073779)},
  publisher = {{World Health Organization}}
}

@article{andersonTrackingCellSurface1992,
  title = {Tracking of Cell Surface Receptors by Fluorescence Digital Imaging Microscopy Using a Charge-Coupled Device Camera. {{Low-density}} Lipoprotein and Influenza Virus Receptor Mobility at 4 Degrees {{C}}},
  author = {Anderson, C.M. and Georgiou, G.N. and Morrison, I.E. and Stevenson, G.V. and Cherry, R.J.},
  date = {1992-02-01},
  journaltitle = {Journal of Cell Science},
  shortjournal = {Journal of Cell Science},
  volume = {101},
  number = {2},
  pages = {415--425},
  issn = {0021-9533},
  doi = {10.1242/jcs.101.2.415},
  url = {https://doi.org/10.1242/jcs.101.2.415},
  urldate = {2022-11-18},
  abstract = {A fluorescence imaging system, based on using a cooled slow-scan CCD camera, has been developed for tracking receptors on the surfaces of living cells. The technique is applicable to receptors for particles such as lipoproteins and viruses that can be labeled with a few tens of fluorophores. The positions of single particles in each image are determined to within 25 nm by fitting the fluorescence distribution to a two-dimensional Gaussian function. This procedure also provides an accurate measure of intensity, which is used as a tag for automated tracking of particles from frame to frame. The method is applied to an investigation of the mobility of receptors for LDL and influenza virus particles on human dermal fibroblasts at 4 degrees C. In contrast to previous studies by FRAP (fluorescence recovery after photo-bleaching), it is found that receptors have a low but measurable mobility at 4 degrees C. Analysis of individual particle tracks indicates that whilst some receptors undergo random diffusion, others undergo directed motion (flow) or diffusion restricted to a domain. A procedure is proposed for subdividing receptors according to their different types of motion and hence determining their motional parameters. The finding that receptors are not completely immobilised at 4 degrees C is significant for studies of receptor distributions performed at this temperature.},
  file = {/Users/michelsen/Zotero/storage/XGPKUI2R/Anderson et al. - 1992 - Tracking of cell surface receptors by fluorescence.pdf;/Users/michelsen/Zotero/storage/SM8HT9EX/Tracking-of-cell-surface-receptors-by-fluorescence.html}
}

@article{bagerRiskHospitalisationAssociated2021,
  title = {Risk of Hospitalisation Associated with Infection with {{SARS-CoV-2}} Lineage {{B}}.1.1.7 in {{Denmark}}: An Observational Cohort Study},
  shorttitle = {Risk of Hospitalisation Associated with Infection with {{SARS-CoV-2}} Lineage {{B}}.1.1.7 in {{Denmark}}},
  author = {Bager, Peter and Wohlfahrt, Jan and Fonager, Jannik and Rasmussen, Morten and Albertsen, Mads and Michaelsen, Thomas Yssing and Møller, Camilla Holten and Ethelberg, Steen and Legarth, Rebecca and Button, Mia Sarah Fischer and Gubbels, Sophie and Voldstedlund, Marianne and Mølbak, Kåre and Skov, Robert Leo and Fomsgaard, Anders and Krause, Tyra Grove},
  date = {2021-11-01},
  journaltitle = {The Lancet Infectious Diseases},
  shortjournal = {The Lancet Infectious Diseases},
  volume = {21},
  number = {11},
  pages = {1507--1517},
  issn = {1473-3099},
  doi = {10.1016/S1473-3099(21)00290-5},
  abstract = {Background The more infectious SARS-CoV-2 lineage B.1.1.7 rapidly spread in Europe after December, 2020, and a concern that B.1.1.7 could cause more severe disease has been raised. Taking advantage of Denmark's high RT-PCR testing and whole genome sequencing capacities, we used national health register data to assess the risk of COVID-19 hospitalisation in individuals infected with B.1.1.7 compared with those with other SARS-CoV-2 lineages. Methods We did an observational cohort study of all SARS-CoV-2-positive cases confirmed by RT-PCR in Denmark, sampled between Jan 1 and March 24, 2021, with 14 days of follow-up for COVID-19 hospitalisation. Cases were identified in the national COVID-19 surveillance system database, which includes data from the Danish Microbiology Database (RT-PCR test results), the Danish COVID-19 Genome Consortium, the National Patient Registry, the Civil Registration System, as well as other nationwide registers. Among all cases, COVID-19 hospitalisation was defined as first admission lasting longer than 12 h within 14 days of a sample with a positive RT-PCR result. The study population and main analysis were restricted to the proportion of cases with viral genome data. We calculated the risk ratio (RR) of admission according to infection with B.1.1.7 versus other co-existing lineages with a Poisson regression model with robust SEs, adjusted a priori for sex, age, calendar time, region, and comorbidities. The contribution of each covariate to confounding of the crude RR was evaluated afterwards by a stepwise forward inclusion. Findings Between Jan 1 and March 24, 2021, 50\,958 individuals with a positive SARS-CoV-2 test and at least 14 days of follow-up for hospitalisation were identified; 30\,572 (60·0\%) had genome data, of whom 10\,544 (34·5\%) were infected with B.1.1.7. 1944 (6·4\%) individuals had a COVID-19 hospitalisation and of these, 571 (29·4\%) had a B.1.1.7 infection and 1373 (70·6\%) had an infection with other SARS-CoV-2 lineages. Although the overall number of hospitalisations decreased during the study period, the proportion of individuals infected with B.1.1.7 increased from 3·5\% to 92·1\% per week. B.1.1.7 was associated with a crude RR of hospital admission of 0·79 (95\% CI 0·72–0·87; p{$<$}0·0001) and an adjusted RR of 1·42 (95\% CI 1·25–1·60; p{$<$}0·0001). The adjusted RR was increased in all strata of age and calendar period—the two covariates with the largest contribution to confounding of the crude RR. Interpretation Infection with SARS-CoV-2 lineage B.1.1.7 was associated with an increased risk of hospitalisation compared with that of other lineages in an analysis adjusted for covariates. The overall effect on hospitalisations in Denmark was lessened due to a strict lockdown, but our findings could support hospital preparedness and modelling of the projected impact of the epidemic in countries with uncontrolled spread of B.1.1.7. Funding None.},
  langid = {english},
  keywords = {corona,covid},
  file = {/Users/michelsen/Zotero/storage/5LV28UGM/bager2021.pdf.pdf;/Users/michelsen/Zotero/storage/6XZGIJYZ/Bager et al. - 2021 - Risk of hospitalisation associated with infection .pdf;/Users/michelsen/Zotero/storage/TCG623TX/S1473309921002905.html}
}

@thesis{bakerInferenceDiffusionCoefficients2021,
  type = {phdthesis},
  title = {Inference of {{Diffusion Coefficients}} from {{Single Particle Trajectories}}},
  author = {Baker, Lewis R.},
  date = {2021},
  institution = {{University of Colorado, Boulder}},
  url = {https://scholar.colorado.edu/concern/graduate_thesis_or_dissertations/bc386k398},
  abstract = {Systems driven by Brownian motion are ubiquitous. A prevailing challenge is inferring, from data, the parameters that describe these stochastic processes. In this work, we investigate a switch diffusion process that arises in the context of single particle tracking (SPT), wherein the motion of a particle is governed by a discrete set of diffusive states, and the tendency of the particle to switch between these states is modeled as a random process. From such experiments, it is desirable to identify the number of underlying diffusive states, quantitate each state through its diffusion coefficient, and characterize the nature of transitions between different states.We present two models to describe this phenomenon: a mixture model and a hidden Markov model (HMM). For both, we adopt a Bayesian approach to sample the distributions of the underlying parameters, implementing a Markov Chain Monte Carlo (MCMC) scheme to estimate the posterior distributions of the model parameters, and leverage our analysis to investigate the problem of determining the number of diffusive states using model selection criteria. We explore the robustness of the inference scheme by testing it on an ensemble of simulated trajectories.Finally, we present a detailed analysis of a collection of experimentally measured trajectories of phosphoinositide-dependent kinase-1 (PDK-1) on a synthetic membrane with concentrations of protein kinase C (PKC) near the K1/2, obtained using total internal reflection fluorescence (TIRF) microscopy. In this particular experimental setup, PKC-1 is believed to exhibit three distinct modes of diffusion. Our analysis finds a two-state model to be a more parsimonious fit of the data, illustrating a significant challenge to parameter inference. Nevertheless, our analysis permits quantitation of kinetic rate constants which, to our knowledge, have not been measured for this system from single particle tracking experiments.},
  langid = {english},
  pagetotal = {71},
  keywords = {Applied mathematics,Biochemistry,diffusion,hidden markov model,inference,Molecular biology},
  file = {/Users/michelsen/Zotero/storage/BASXG3GD/bc386k398.html}
}

@book{barlowStatisticsGuideUse1993,
  title = {Statistics: {{A Guide}} to the {{Use}} of {{Statistical Methods}} in the {{Physical Sciences}}},
  shorttitle = {Statistics},
  author = {Barlow, R. J.},
  date = {1993-11-01},
  publisher = {{Wiley}},
  location = {{Chichester, England ; New York}},
  isbn = {978-0-471-92295-7},
  langid = {english},
  pagetotal = {222}
}

@inproceedings{bergstraAlgorithmsHyperParameterOptimization2011,
  title = {Algorithms for {{Hyper-Parameter Optimization}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Bergstra, James and Bardenet, Rémi and Bengio, Yoshua and Kégl, Balázs},
  date = {2011},
  volume = {24},
  publisher = {{Curran Associates, Inc.}},
  url = {https://papers.nips.cc/paper/2011/hash/86e8f7ab32cfd12577bc2619bc635690-Abstract.html},
  urldate = {2022-11-16},
  abstract = {Several recent advances to the state of the art in image classification benchmarks have come from better configurations of existing techniques rather than novel approaches to feature learning. Traditionally, hyper-parameter optimization has been the job of humans because they can be very efficient in regimes where only a few trials are possible. Presently, computer clusters and GPU processors make it possible to run more trials and we show that algorithmic approaches can find better results. We present hyper-parameter optimization results on tasks of training neural networks and deep belief networks (DBNs). We optimize hyper-parameters using random search and two new greedy sequential methods based on the expected improvement criterion. Random search has been shown to be sufficiently efficient for learning neural networks for several datasets, but we show it is unreliable for training DBNs. The sequential algorithms are applied to the most difficult DBN learning problems from [Larochelle et al., 2007] and find significantly better results than the best previously reported. This work contributes novel techniques for making response surface models P (y|x) in which many elements of hyper-parameter assignment (x) are known to be irrelevant given particular values of other elements.},
  file = {/Users/michelsen/Zotero/storage/RAC8JZWL/Bergstra et al. - 2011 - Algorithms for Hyper-Parameter Optimization.pdf}
}

@article{bergstraRandomSearchHyperparameter2012a,
  title = {Random Search for Hyper-Parameter Optimization},
  author = {Bergstra, James and Bengio, Yoshua},
  date = {2012},
  journaltitle = {Journal of Machine Learning Research},
  volume = {13},
  number = {10},
  pages = {281--305},
  url = {http://jmlr.org/papers/v13/bergstra12a.html}
}

@unpublished{betancourtConceptualIntroductionHamiltonian2018,
  title = {A {{Conceptual Introduction}} to {{Hamiltonian Monte Carlo}}},
  author = {Betancourt, Michael},
  date = {2018-07-15},
  eprint = {1701.02434},
  eprinttype = {arxiv},
  primaryclass = {stat},
  abstract = {Hamiltonian Monte Carlo has proven a remarkable empirical success, but only recently have we begun to develop a rigorous understanding of why it performs so well on difficult problems and how it is best applied in practice. Unfortunately, that understanding is confined within the mathematics of differential geometry which has limited its dissemination, especially to the applied communities for which it is particularly important. In this review I provide a comprehensive conceptual account of these theoretical foundations, focusing on developing a principled intuition behind the method and its optimal implementations rather of any exhaustive rigor. Whether a practitioner or a statistician, the dedicated reader will acquire a solid grasp of how Hamiltonian Monte Carlo works, when it succeeds, and, perhaps most importantly, when it fails.},
  archiveprefix = {arXiv},
  keywords = {HMC,Statistics - Methodology},
  file = {/Users/michelsen/Zotero/storage/7N3P7B7B/Betancourt - 2018 - A Conceptual Introduction to Hamiltonian Monte Car.pdf;/Users/michelsen/Zotero/storage/FUWHE69E/1701.html}
}

@article{bezansonJuliaFreshApproach2017,
  title = {Julia: {{A}} Fresh Approach to Numerical Computing},
  author = {Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B},
  date = {2017},
  journaltitle = {SIAM review},
  volume = {59},
  number = {1},
  pages = {65--98},
  publisher = {{SIAM}},
  url = {https://julialang.org/},
  keywords = {julia}
}

@article{binghamPyroDeepUniversal2019,
  title = {Pyro: {{Deep}} Universal Probabilistic Programming},
  author = {Bingham, Eli and Chen, Jonathan P. and Jankowiak, Martin and Obermeyer, Fritz and Pradhan, Neeraj and Karaletsos, Theofanis and Singh, Rohit and Szerlip, Paul A. and Horsfall, Paul and Goodman, Noah D.},
  date = {2019},
  journaltitle = {Journal of Machine Learning Research},
  shortjournal = {J. Mach. Learn. Res.},
  volume = {20},
  pages = {28:1--28:6},
  url = {http://jmlr.org/papers/v20/18-403.html}
}

@article{borryPyDamageAutomatedAncient2021,
  title = {{{PyDamage}}: Automated Ancient Damage Identification and Estimation for Contigs in Ancient {{DNA}} de Novo Assembly},
  shorttitle = {{{PyDamage}}},
  author = {Borry, Maxime and Hübner, Alexander and Rohrlach, Adam B. and Warinner, Christina},
  date = {2021-07-27},
  journaltitle = {PeerJ},
  shortjournal = {PeerJ},
  volume = {9},
  pages = {e11845},
  publisher = {{PeerJ Inc.}},
  issn = {2167-8359},
  doi = {10.7717/peerj.11845},
  abstract = {DNA de novo assembly can be used to reconstruct longer stretches of DNA (contigs), including genes and even genomes, from short DNA sequencing reads. Applying this technique to metagenomic data derived from archaeological remains, such as paleofeces and dental calculus, we can investigate past microbiome functional diversity that may be absent or underrepresented in the modern microbiome gene catalogue. However, compared to modern samples, ancient samples are often burdened with environmental contamination, resulting in metagenomic datasets that represent mixtures of ancient and modern DNA. The ability to rapidly and reliably establish the authenticity and integrity of ancient samples is essential for ancient DNA studies, and the ability to distinguish between ancient and modern sequences is particularly important for ancient microbiome studies. Characteristic patterns of ancient DNA damage, namely DNA fragmentation and cytosine deamination (observed as C-to-T transitions) are typically used to authenticate ancient samples and sequences, but existing tools for inspecting and filtering aDNA damage either compute it at the read level, which leads to high data loss and lower quality when used in combination with de novo assembly, or require manual inspection, which is impractical for ancient assemblies that typically contain tens to hundreds of thousands of contigs. To address these challenges, we designed PyDamage, a robust, automated approach for aDNA damage estimation and authentication of de novo assembled aDNA. PyDamage uses a likelihood ratio based approach to discriminate between truly ancient contigs and contigs originating from modern contamination. We test PyDamage on both on simulated aDNA data and archaeological paleofeces, and we demonstrate its ability to reliably and automatically identify contigs bearing DNA damage characteristic of aDNA. Coupled with aDNA de novo assembly, Pydamage opens up new doors to explore functional diversity in ancient metagenomic datasets.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/PG43CP9L/Borry et al. - 2021 - PyDamage automated ancient damage identification .pdf}
}

@software{bradburyJAXComposableTransformations2018,
  title = {{{JAX}}: Composable Transformations of {{Python NumPy}} Programs},
  author = {Bradbury, James and Frostig, Roy and Hawkins, Peter and Johnson, Matthew James and Leary, Chris and Maclaurin, Dougal and Necula, George and Paszke, Adam and Vander\{P\}las, Jake and Wanderman-\{M\}ilne, Skye and Zhang, Qiao},
  date = {2018},
  url = {http://github.com/google/jax},
  version = {0.2.5}
}

@article{briggsPatternsDamageGenomic2007,
  title = {Patterns of Damage in Genomic {{DNA}} Sequences from a {{Neandertal}}},
  author = {Briggs, Adrian W. and Stenzel, Udo and Johnson, Philip L. F. and Green, Richard E. and Kelso, Janet and Prüfer, Kay and Meyer, Matthias and Krause, Johannes and Ronan, Michael T. and Lachmann, Michael and Pääbo, Svante},
  date = {2007-09-11},
  journaltitle = {Proceedings of the National Academy of Sciences of the United States of America},
  shortjournal = {Proc Natl Acad Sci U S A},
  volume = {104},
  number = {37},
  eprint = {17715061},
  eprinttype = {pmid},
  pages = {14616--14621},
  issn = {0027-8424},
  doi = {10.1073/pnas.0704665104},
  abstract = {High-throughput direct sequencing techniques have recently opened the possibility to sequence genomes from Pleistocene organisms. Here we analyze DNA sequences determined from a Neandertal, a mammoth, and a cave bear. We show that purines are overrepresented at positions adjacent to the breaks in the ancient DNA, suggesting that depurination has contributed to its degradation. We furthermore show that substitutions resulting from miscoding cytosine residues are vastly overrepresented in the DNA sequences and drastically clustered in the ends of the molecules, whereas other substitutions are rare. We present a model where the observed substitution patterns are used to estimate the rate of deamination of cytosine residues in single- and double-stranded portions of the DNA, the length of single-stranded ends, and the frequency of nicks. The results suggest that reliable genome sequences can be obtained from Pleistocene organisms.},
  pmcid = {PMC1976210},
  keywords = {adna,ancient dna,damage,damage pattern,mismatch},
  file = {/Users/michelsen/Zotero/storage/8RS8X78U/Briggs et al. - 2007 - Patterns of damage in genomic DNA sequences from a.pdf}
}

@misc{brochuTutorialBayesianOptimization2010a,
  title = {A {{Tutorial}} on {{Bayesian Optimization}} of {{Expensive Cost Functions}}, with {{Application}} to {{Active User Modeling}} and {{Hierarchical Reinforcement Learning}}},
  author = {Brochu, Eric and Cora, Vlad M. and de Freitas, Nando},
  options = {useprefix=true},
  date = {2010-12-12},
  number = {arXiv:1012.2599},
  eprint = {1012.2599},
  eprinttype = {arxiv},
  primaryclass = {cs},
  publisher = {{arXiv}},
  doi = {10.48550/arXiv.1012.2599},
  url = {http://arxiv.org/abs/1012.2599},
  urldate = {2022-11-16},
  abstract = {We present a tutorial on Bayesian optimization, a method of finding the maximum of expensive cost functions. Bayesian optimization employs the Bayesian technique of setting a prior over the objective function and combining it with evidence to get a posterior function. This permits a utility-based selection of the next observation to make on the objective function, which must take into account both exploration (sampling from areas of high uncertainty) and exploitation (sampling areas likely to offer improvement over the current best observation). We also present two detailed extensions of Bayesian optimization, with experiments---active user modelling with preferences, and hierarchical reinforcement learning---and a discussion of the pros and cons of Bayesian optimization based on our experiences.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning,G.1.6,G.3,I.2.6},
  file = {/Users/michelsen/Zotero/storage/TFYS4CFH/Brochu et al. - 2010 - A Tutorial on Bayesian Optimization of Expensive C.pdf;/Users/michelsen/Zotero/storage/AIQ2PYTE/1012.html}
}

@article{carpenterStanProbabilisticProgramming2017,
  title = {Stan: {{A}} Probabilistic Programming Language},
  author = {Carpenter, Bob and Gelman, Andrew and Hoffman, Matthew D and Lee, Daniel and Goodrich, Ben and Betancourt, Michael and Brubaker, Marcus and Guo, Jiqiang and Li, Peter and Riddell, Allen},
  date = {2017},
  journaltitle = {Journal of statistical software},
  volume = {76},
  number = {1},
  publisher = {{Columbia Univ., New York, NY (United States); Harvard Univ., Cambridge, MA (United States)}}
}

@article{cepeda-cuervoDoubleGeneralizedBetaBinomial2017,
  title = {Double {{Generalized Beta-Binomial}} and {{Negative Binomial Regression Models}}},
  author = {Cepeda-Cuervo, Edilberto and Cifuentes-Amado, MARíA VICTORIA},
  date = {2017-01},
  journaltitle = {Revista Colombiana de Estadística},
  volume = {40},
  number = {1},
  pages = {141--163},
  publisher = {{Universidad Nacional de Colombia.}},
  issn = {0120-1751},
  doi = {10.15446/rce.v40n1.61779},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/HRDDWJPK/Cepeda-Cuervo and Cifuentes-Amado - 2017 - Double Generalized Beta-Binomial and Negative Bino.pdf;/Users/michelsen/Zotero/storage/4ZKDB6Y5/scielo.html}
}

@article{dabneyAncientDNADamage2013,
  title = {Ancient {{DNA Damage}}},
  author = {Dabney, Jesse and Meyer, Matthias and Pääbo, Svante},
  date = {2013-07},
  journaltitle = {Cold Spring Harbor Perspectives in Biology},
  shortjournal = {Cold Spring Harb Perspect Biol},
  volume = {5},
  number = {7},
  eprint = {23729639},
  eprinttype = {pmid},
  pages = {a012567},
  issn = {1943-0264},
  doi = {10.1101/cshperspect.a012567},
  abstract = {Under favorable conditions DNA can survive for thousands of years in the remains of dead organisms. The DNA extracted from such remains is invariably degraded to a small average size by processes that at least partly involve depurination. It also contains large amounts of deaminated cytosine residues that are accumulated toward the ends of the molecules, as well as several other lesions that are less well characterized., DNA fragments from ancient specimens are short (40–500 bp) and contain lesions that block DNA polymerases and cause replication errors. Degradation involves depurination and cytosine deamination, but other processes may be involved.},
  pmcid = {PMC3685887},
  keywords = {mikkel},
  file = {/Users/michelsen/Zotero/storage/K6IAFPED/Dabney et al. - 2013 - Ancient DNA Damage.pdf;/Users/michelsen/Zotero/storage/ZWXUHXEU/dabney2013.pdf.pdf}
}

@article{dabneyCompleteMitochondrialGenome2013,
  title = {Complete Mitochondrial Genome Sequence of a {{Middle Pleistocene}} Cave Bear Reconstructed from Ultrashort {{DNA}} Fragments},
  author = {Dabney, Jesse and Knapp, Michael and Glocke, Isabelle and Gansauge, Marie-Theres and Weihmann, Antje and Nickel, Birgit and Valdiosera, Cristina and García, Nuria and Pääbo, Svante and Arsuaga, Juan-Luis and Meyer, Matthias},
  date = {2013-09-24},
  journaltitle = {Proceedings of the National Academy of Sciences},
  volume = {110},
  number = {39},
  pages = {15758--15763},
  publisher = {{Proceedings of the National Academy of Sciences}},
  doi = {10.1073/pnas.1314445110},
  abstract = {Although an inverse relationship is expected in ancient DNA samples between the number of surviving DNA fragments and their length, ancient DNA sequencing libraries are strikingly deficient in molecules shorter than 40 bp. We find that a loss of short molecules can occur during DNA extraction and present an improved silica-based extraction protocol that enables their efficient retrieval. In combination with single-stranded DNA library preparation, this method enabled us to reconstruct the mitochondrial genome sequence from a Middle Pleistocene cave bear (Ursus deningeri) bone excavated at Sima de los Huesos in the Sierra de Atapuerca, Spain. Phylogenetic reconstructions indicate that the U. deningeri sequence forms an early diverging sister lineage to all Western European Late Pleistocene cave bears. Our results prove that authentic ancient DNA can be preserved for hundreds of thousand years outside of permafrost. Moreover, the techniques presented enable the retrieval of phylogenetically informative sequences from samples in which virtually all DNA is diminished to fragments shorter than 50 bp.},
  file = {/Users/michelsen/Zotero/storage/QIP3ZZVT/dabney2013.pdf.pdf;/Users/michelsen/Zotero/storage/UZ69KHPF/Dabney et al. - 2013 - Complete mitochondrial genome sequence of a Middle.pdf}
}

@article{daleyModelingGenomeCoverage2014,
  title = {Modeling Genome Coverage in Single-Cell Sequencing},
  author = {Daley, Timothy and Smith, Andrew D.},
  date = {2014-11-15},
  journaltitle = {Bioinformatics},
  shortjournal = {Bioinformatics},
  volume = {30},
  number = {22},
  eprint = {25107873},
  eprinttype = {pmid},
  pages = {3159--3165},
  issn = {1367-4803},
  doi = {10.1093/bioinformatics/btu540},
  abstract = {Motivation: Single-cell DNA sequencing is necessary for examining genetic variation at the cellular level, which remains hidden in bulk sequencing experiments. But because they begin with such small amounts of starting material, the amount of information that is obtained from single-cell sequencing experiment is highly sensitive to the choice of protocol employed and variability in library preparation. In particular, the fraction of the genome represented in single-cell sequencing libraries exhibits extreme variability due to quantitative biases in amplification and loss of genetic material., Results: We propose a method to predict the genome coverage of a deep sequencing experiment using information from an initial shallow sequencing experiment mapped to a reference genome. The observed coverage statistics are used in a non-parametric empirical Bayes Poisson model to estimate the gain in coverage from deeper sequencing. This approach allows researchers to know statistical features of deep sequencing experiments without actually sequencing deeply, providing a basis for optimizing and comparing single-cell sequencing protocols or screening libraries., Availability and implementation: The method is available as part of the preseq software package. Source code is available at http://smithlabresearch.org/preseq., Contact: andrewds@usc.edu, Supplementary information: Supplementary material is available at Bioinformatics online.},
  pmcid = {PMC4221128},
  file = {/Users/michelsen/Zotero/storage/6ZK3XXI9/Daley and Smith - 2014 - Modeling genome coverage in single-cell sequencing.pdf}
}

@software{dembinskiScikithepIminuitV22021,
  title = {Scikit-Hep/Iminuit: V2.8.2},
  shorttitle = {Scikit-Hep/Iminuit},
  author = {Dembinski, Hans and Piti Ongmongkolkul and Deil, Christoph and Hurtado, David Menéndez and Schreiner, Henry and Feickert, Matthew and Andrew and Burr, Chris and Watson, Jason and Rost, Fabian and Pearce, Alex and Geiger, Lukas and Wiedemann, Bernhard M. and Gohlke, Christoph and Gonzalo and Drotleff, Jonas and Eschle, Jonas and Neste, Ludwig and Gorelli, Marco Edward and Baak, Max and Zapata, Omar and Odidev},
  date = {2021-08-15},
  doi = {10.5281/ZENODO.3949207},
  abstract = {See changelog on RTD},
  organization = {{Zenodo}},
  version = {v2.8.2},
  keywords = {iminuit}
}

@article{dietzDirectedFactorGraph2022,
  title = {Directed Factor Graph Notation for Generative Models},
  author = {Dietz, Laura},
  date = {2022-11-02},
  abstract = {We introduce the directed factor graph notation, a visual language for specifying the generative process of a probabilistic model. In contrast to boiler plate diagrams, directed factor graphs provide more information about the generative process, allowing to judge the complexity of the model at a glance.}
}

@article{flagelUnreasonableEffectivenessConvolutional2018,
  title = {The {{Unreasonable Effectiveness}} of {{Convolutional Neural Networks}} in {{Population Genetic Inference}}},
  author = {Flagel, Lex and Brandvain, Yaniv J and Schrider, Daniel R},
  date = {2018-11-27},
  journaltitle = {bioRxiv},
  doi = {10.1101/336073},
  abstract = {Population-scale genomic datasets have given researchers incredible amounts of information from which to infer evolutionary histories. Concomitant with this flood of data, theoretical and methodological advances have sought to extract information from genomic sequences to infer demographic events such as population size changes and gene flow among closely related populations/species, construct recombination maps, and uncover loci underlying recent adaptation. To date most methods make use of only one or a few summaries of the input sequences and therefore ignore potentially useful information encoded in the data. The most sophisticated of these approaches involve likelihood calculations, which require theoretical advances for each new problem, and often focus on a single aspect of the data (e.g. only allele frequency information) in the interest of mathematical and computational tractability. Directly interrogating the entirety of the input sequence data in a likelihood-free manner would thus offer a fruitful alternative. Here we accomplish this by representing DNA sequence alignments as images and using a class of deep learning methods called convolutional neural networks (CNNs) to make population genetic inferences from these images. We apply CNNs to a number of evolutionary questions and find that they frequently match or exceed the accuracy of current methods. Importantly, we show that CNNs perform accurate evolutionary model selection and parameter estimation, even on problems that have not received detailed theoretical treatments. Thus, when applied to population genetic alignments, CNN are capable of outperforming expert-derived statistical methods, and offer a new path forward in cases where no likelihood approach exists.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/74GZM445/Flagel et al. - 2018 - The Unreasonable Effectiveness of Convolutional Ne.pdf}
}

@book{gelmanBayesianDataAnalysis2015a,
  title = {Bayesian {{Data Analysis}}},
  author = {Gelman, Andrew and Carlin, John B. and Stern, Hal S. and Dunson, David B. and Vehtari, Aki and Rubin, Donald B.},
  date = {2015-07-06},
  edition = {3},
  publisher = {{Chapman and Hall/CRC}},
  location = {{New York}},
  doi = {10.1201/b16018},
  abstract = {Winner of the 2016 De Groot Prize from the International Society for Bayesian AnalysisNow in its third edition, this classic book is widely considered the leading text on Bayesian methods, lauded for its accessible, practical approach to analyzing data and solving research problems. Bayesian Data Analysis, Third Edition continues to take an applied},
  isbn = {978-0-429-11307-9},
  pagetotal = {675}
}

@article{gelmanUnderstandingPredictiveInformation2014,
  title = {Understanding Predictive Information Criteria for {{Bayesian}} Models},
  author = {Gelman, Andrew and Hwang, Jessica and Vehtari, Aki},
  date = {2014-11},
  journaltitle = {Statistics and Computing},
  shortjournal = {Stat Comput},
  volume = {24},
  number = {6},
  pages = {997--1016},
  issn = {0960-3174, 1573-1375},
  doi = {10.1007/s11222-013-9416-2},
  abstract = {We review the Akaike, deviance, and Watanabe-Akaike information criteria from a Bayesian perspective, where the goal is to estimate expected out-of-sample-prediction error using a biascorrected adjustment of within-sample error. We focus on the choices involved in setting up these measures, and we compare them in three simple examples, one theoretical and two applied. The contribution of this review is to put all these information criteria into a Bayesian predictive context and to better understand, through small examples, how these methods can apply in practice.},
  langid = {english},
  keywords = {waic},
  file = {/Users/michelsen/Zotero/storage/D23ANURG/Gelman et al. - 2014 - Understanding predictive information criteria for .pdf}
}

@article{gelmanUnderstandingPredictiveInformation2014a,
  title = {Understanding Predictive Information Criteria for {{Bayesian}} Models},
  author = {Gelman, Andrew and Hwang, Jessica and Vehtari, Aki},
  date = {2014-11-01},
  journaltitle = {Statistics and Computing},
  shortjournal = {Stat Comput},
  volume = {24},
  number = {6},
  pages = {997--1016},
  issn = {1573-1375},
  doi = {10.1007/s11222-013-9416-2},
  url = {https://doi.org/10.1007/s11222-013-9416-2},
  urldate = {2022-11-02},
  abstract = {We review the Akaike, deviance, and Watanabe-Akaike information criteria from a Bayesian perspective, where the goal is to estimate expected out-of-sample-prediction error using a bias-corrected adjustment of within-sample error. We focus on the choices involved in setting up these measures, and we compare them in three simple examples, one theoretical and two applied. The contribution of this paper is to put all these information criteria into a Bayesian predictive context and to better understand, through small examples, how these methods can apply in practice.},
  langid = {english},
  keywords = {AIC,Bayes,Cross-validation,DIC,Prediction,WAIC},
  file = {/Users/michelsen/Zotero/storage/YH794826/gelman2013.pdf.pdf}
}

@online{genomicsBriefHistoryNext2021,
  title = {A Brief History of {{Next Generation Sequencing}} ({{NGS}})},
  author = {Genomics, Front Line and Mobley, Immy},
  date = {2021-07-26T11:46:18+00:00},
  url = {https://frontlinegenomics.com/a-brief-history-of-next-generation-sequencing-ngs/},
  urldate = {2022-11-22},
  abstract = {It is now possible to sequence an entire genome in just one day, due to the advent of next generation sequencing (NGS).},
  langid = {english},
  organization = {{Front Line Genomics}},
  file = {/Users/michelsen/Zotero/storage/H2JFWMDM/a-brief-history-of-next-generation-sequencing-ngs.html}
}

@inproceedings{geTuringLanguageFlexible2018,
  title = {Turing: {{A Language}} for {{Flexible Probabilistic Inference}}},
  shorttitle = {Turing},
  booktitle = {Proceedings of the {{Twenty-First International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
  author = {Ge, Hong and Xu, Kai and Ghahramani, Zoubin},
  date = {2018-03-31},
  pages = {1682--1690},
  publisher = {{PMLR}},
  issn = {2640-3498},
  url = {https://proceedings.mlr.press/v84/ge18b.html},
  urldate = {2022-11-16},
  abstract = {Probabilistic programming promises to simplify and democratize probabilistic machine learning, but successful probabilistic programming systems require flexible, generic and efficient inference engines. In this work, we present a system called Turing for building MCMC algorithms for probabilistic programming inference. Turing has a very simple syntax and makes full use of the numerical capabilities in the Julia programming language, including all implemented probability distributions, and automatic differentiation. Turing supports a wide range of popular Monte Carlo algorithms, including Hamiltonian Monte Carlo (HMC), HMC with No-U-Turns (NUTS), Gibbs sampling, sequential Monte Carlo (SMC), and several particle MCMC (PMCMC) samplers. Most importantly, Turing inference is composable: it combines MCMC operations on subsets of variables, for example using a combination of an HMC engine and a particle Gibbs (PG) engine.  We explore several combinations of inference methods with the aim of finding approaches that are both efficient and universal, i.e. applicable to arbitrary probabilistic models. NUTS—a popular variant of HMC that adapts Hamiltonian simulation path length automatically, although quite powerful for exploring differentiable target distributions, is however not universal. We identify some failure modes for the NUTS engine, and demonstrate that composition of PG (for discrete variables) and NUTS (for continuous variables) can be useful when the NUTS engine is either not applicable, or simply does not work well. Our aim is to present Turing and its composable inference engines to the world and encourage other researchers to build on this system to help advance the field of probabilistic machine learning.},
  eventtitle = {International {{Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/GC5T4QAA/Ge et al. - 2018 - Turing A Language for Flexible Probabilistic Infe.pdf}
}

@article{gilbertAssessingAncientDNA2005,
  title = {Assessing Ancient {{DNA}} Studies},
  author = {Gilbert, M. Thomas P. and Bandelt, Hans-Jürgen and Hofreiter, Michael and Barnes, Ian},
  date = {2005-10-01},
  journaltitle = {Trends in Ecology \& Evolution},
  shortjournal = {Trends in Ecology \& Evolution},
  volume = {20},
  number = {10},
  pages = {541--544},
  issn = {0169-5347},
  doi = {10.1016/j.tree.2005.07.005},
  url = {https://www.sciencedirect.com/science/article/pii/S0169534705002260},
  urldate = {2022-11-14},
  abstract = {The study of ancient DNA has the potential to make significant and unique contributions to ecology and evolution. However, the techniques used contain inherent problems, particularly with regards to the generation of authentic and useful data. The solution currently advocated to reduce contamination and artefactual results is to adopt criteria for authentication. Nevertheless, these criteria are not foolproof, and we believe that they have, in practice, replaced the use of thought and prudence when designing and executing ancient DNA studies. We argue here that researchers in this field must take a more cognitive and self-critical approach. Specifically, in place of checking criteria off lists, researchers must explain, in sufficient enough detail to dispel doubt, how the data were obtained, and why they should be believed to be authentic.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/Q3SKQL5V/gilbert2005.pdf.pdf;/Users/michelsen/Zotero/storage/YX29XGBI/Gilbert et al. - 2005 - Assessing ancient DNA studies.pdf;/Users/michelsen/Zotero/storage/75LJBXEP/S0169534705002260.html}
}

@article{gillespieExactStochasticSimulation1977,
  title = {Exact Stochastic Simulation of Coupled Chemical Reactions},
  author = {Gillespie, Daniel T.},
  date = {1977-12-01},
  journaltitle = {The Journal of Physical Chemistry},
  shortjournal = {J. Phys. Chem.},
  volume = {81},
  number = {25},
  pages = {2340--2361},
  publisher = {{American Chemical Society}},
  issn = {0022-3654},
  doi = {10.1021/j100540a008},
  url = {https://doi.org/10.1021/j100540a008},
  urldate = {2022-11-17},
  file = {/Users/michelsen/Zotero/storage/IMY69NTV/gillespie1977.pdf.pdf;/Users/michelsen/Zotero/storage/UP4JKA3N/Gillespie - 1977 - Exact stochastic simulation of coupled chemical re.pdf;/Users/michelsen/Zotero/storage/ZD28CYZP/j100540a008.html}
}

@book{hastieElementsStatisticalLearning2016,
  title = {The {{Elements}} of {{Statistical Learning}}},
  author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
  date = {2016},
  series = {Springer {{Series}} in {{Statistics}}},
  publisher = {{Springer}},
  location = {{New York, NY}},
  doi = {10.1007/978-0-387-84858-7},
  url = {http://link.springer.com/10.1007/978-0-387-84858-7},
  urldate = {2022-11-16},
  isbn = {978-0-387-84857-0},
  keywords = {Averaging,Boosting,classification,clustering,data mining,machine learning,Projection pursuit,Random Forest,supervised learning,Support Vector Machine,unsupervised learning},
  file = {/Users/michelsen/Zotero/storage/DPBAT5Q4/Hastie et al. - 2009 - The Elements of Statistical Learning.pdf}
}

@article{heltbergPhysicalObservablesDetermine2021,
  title = {Physical Observables to Determine the Nature of Membrane-Less Cellular Sub-Compartments},
  author = {Heltberg, Mathias L and Miné-Hattab, Judith and Taddei, Angela and Walczak, Aleksandra M and Mora, Thierry},
  editor = {Seminara, Agnese and Faraldo-Gómez, José D and Ronceray, Pierre},
  date = {2021-10-22},
  journaltitle = {eLife},
  volume = {10},
  pages = {e69181},
  publisher = {{eLife Sciences Publications, Ltd}},
  issn = {2050-084X},
  doi = {10.7554/eLife.69181},
  url = {https://doi.org/10.7554/eLife.69181},
  urldate = {2022-11-18},
  abstract = {The spatial organization of complex biochemical reactions is essential for the regulation of cellular processes. Membrane-less structures called foci containing high concentrations of specific proteins have been reported in a variety of contexts, but the mechanism of their formation is not fully understood. Several competing mechanisms exist that are difficult to distinguish empirically, including liquid-liquid phase separation, and the trapping of molecules by multiple binding sites. Here, we propose a theoretical framework and outline observables to differentiate between these scenarios from single molecule tracking experiments. In the binding site model, we derive relations between the distribution of proteins, their diffusion properties, and their radial displacement. We predict that protein search times can be reduced for targets inside a liquid droplet, but not in an aggregate of slowly moving binding sites. We use our results to reject the multiple binding site model for Rad52 foci, and find a picture consistent with a liquid-liquid phase separation. These results are applicable to future experiments and suggest different biological roles for liquid droplet and binding site foci.},
  keywords = {cellular foci,liquid droplet,liquid-liquid phase separation,membrane-less sub-compartments,polymer binding model},
  file = {/Users/michelsen/Zotero/storage/XWUDN5L3/Heltberg et al. - 2021 - Physical observables to determine the nature of me.pdf}
}

@article{heltbergSpatialHeterogeneityAffects2022a,
  title = {Spatial Heterogeneity Affects Predictions from Early-Curve Fitting of Pandemic Outbreaks: A Case Study Using Population Data from {{Denmark}}},
  author = {Heltberg, Mathias Spliid and Michelsen, Christian and Martiny, Emil S. and Christensen, Lasse Engbo and Jensen, Mogens H. and Halasa, Tariq and Petersen, Troels C.},
  date = {2022-09-14},
  journaltitle = {Royal Society Open Science},
  volume = {9},
  number = {9},
  publisher = {{TheRoyal Society Publishing}},
  issn = {2054-5703},
  doi = {10.1098/rsos.220018},
  abstract = {The modelling of pandemics has become a critical aspect in modern society. Even though artificial intelligence can help the forecast, the implementation of ordinary differential equations which estimate the time development in the number of susceptible, (exposed), infected and recovered (SIR/SEIR) individuals is still important in order to understand the stage of the pandemic. These models are based on simplified assumptions which constitute approximations, but to what extent this are erroneous is not understood since many factors can affect the development. In this paper, we introduce an agent-based model including spatial clustering and heterogeneities in connectivity and infection strength. Based on Danish population data, we estimate how this impacts the early prediction of a pandemic and compare this to the long-term development. Our results show that early phase SEIR model predictions overestimate the peak number of infected and the equilibrium level by at least a factor of two. These results are robust to variations of parameters influencing connection distances and independent of the distribution of infection rates.},
  langid = {english},
  keywords = {agent-based modelling,COVID-19,fitting,pandemics,spatial heterogenity}
}

@incollection{hethcoteThreeBasicEpidemiological1989,
  title = {Three {{Basic Epidemiological Models}}},
  booktitle = {Applied {{Mathematical Ecology}}},
  author = {Hethcote, Herbert W.},
  editor = {Levin, Simon A. and Hallam, Thomas G. and Gross, Louis J.},
  date = {1989},
  series = {Biomathematics},
  pages = {119--144},
  publisher = {{Springer}},
  location = {{Berlin, Heidelberg}},
  doi = {10.1007/978-3-642-61317-3_5},
  url = {https://doi.org/10.1007/978-3-642-61317-3_5},
  urldate = {2022-11-17},
  abstract = {There are three basic types of deterministic models for infectious diseases which are spread by direct person-to-person contact in a population. Here these simplest models are formulated as initial value problems for systems of ordinary differential equations and are analysed mathematically. Theorems are stated regarding the asymptotic stability regions for the equilibrium points and phase plane portraits of solution paths are presented. Parameters are estimated for various diseases and are used to compare the vaccination levels necessary for herd immunity for these diseases. Although the three models presented are simple and their mathematical analyses are elementary, these models provide notation, concepts, intuition and foundation for considering more refined models. Some possible refinements are disease-related factors such as the infectious agent, mode of transmission, latent period, infectious period, susceptibility and resistance, but also social, cultural, Ecology by providing a sound intuitive understanding and complete proofs for the three most basic epidemiological models for microparasitic infections.},
  isbn = {978-3-642-61317-3},
  langid = {english},
  keywords = {Endemic Equilibrium,Epidemic Model,Equilibrium Point,Herd Immunity,Initial Value Problem}
}

@article{homanNoUturnSamplerAdaptively2014,
  title = {The {{No-U-turn}} Sampler: Adaptively Setting Path Lengths in {{Hamiltonian Monte Carlo}}},
  shorttitle = {The {{No-U-turn}} Sampler},
  author = {Homan, Matthew D. and Gelman, Andrew},
  date = {2014-01-01},
  journaltitle = {The Journal of Machine Learning Research},
  shortjournal = {J. Mach. Learn. Res.},
  volume = {15},
  number = {1},
  pages = {1593--1623},
  issn = {1532-4435},
  abstract = {Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) algorithm that avoids the random walk behavior and sensitivity to correlated parameters that plague many MCMC methods by taking a series of steps informed by first-order gradient information. These features allow it to converge to high-dimensional target distributions much more quickly than simpler methods such as random walk Metropolis or Gibbs sampling. However, HMC's performance is highly sensitive to two user-specified parameters: a step size ε and a desired number of steps L. In particular, if L is too small then the algorithm exhibits undesirable random walk behavior, while if L is too large the algorithm wastes computation. We introduce the No-U-Turn Sampler (NUTS), an extension to HMC that eliminates the need to set a number of steps L. NUTS uses a recursive algorithm to build a set of likely candidate points that spans a wide swath of the target distribution, stopping automatically when it starts to double back and retrace its steps. Empirically, NUTS performs at least as efficiently as (and sometimes more effciently than) a well tuned standard HMC method, without requiring user intervention or costly tuning runs. We also derive a method for adapting the step size parameter ε on the fly based on primal-dual averaging. NUTS can thus be used with no hand-tuning at all, making it suitable for applications such as BUGS-style automatic inference engines that require efficient "turnkey" samplers.},
  keywords = {adaptive Monte Carlo,Bayesian inference,dual averaging,Hamiltonian Monte Carlo,Markov chain Monte Carlo},
  file = {/Users/michelsen/Zotero/storage/FN4ZBJNS/Homan and Gelman - 2014 - The No-U-turn sampler adaptively setting path len.pdf}
}

@article{jonssonMapDamage2FastApproximate2013,
  title = {{{mapDamage2}}.0: Fast Approximate {{Bayesian}} Estimates of Ancient {{DNA}} Damage Parameters},
  shorttitle = {{{mapDamage2}}.0},
  author = {Jónsson, Hákon and Ginolhac, Aurélien and Schubert, Mikkel and Johnson, Philip L. F. and Orlando, Ludovic},
  date = {2013-07},
  journaltitle = {Bioinformatics},
  volume = {29},
  number = {13},
  pages = {1682--1684},
  issn = {1367-4803, 1460-2059},
  doi = {10.1093/bioinformatics/btt193},
  abstract = {Motivation: Ancient DNA (aDNA) molecules in fossilized bones and teeth, coprolites, sediments, mummified specimens and museum collections represent fantastic sources of information for evolutionary biologists, revealing the agents of past epidemics and the dynamics of past populations. However, the analysis of aDNA generally faces two major issues. Firstly, sequences consist of a mixture of endogenous and various exogenous backgrounds, mostly microbial. Secondly, high nucleotide misincorporation rates can be observed as a result of severe post-mortem DNA damage. Such misincorporation patterns are instrumental to authenticate ancient sequences versus modern contaminants. We recently developed the user-friendly mapDamage package that identifies such patterns from next-generation sequencing (NGS) sequence datasets. The absence of formal statistical modeling of the DNA damage process, however, precluded rigorous quantitative comparisons across samples.},
  langid = {english},
  keywords = {mapDamage},
  file = {/Users/michelsen/Zotero/storage/ADBPC7SX/Jónsson et al. - 2013 - mapDamage2.0 fast approximate Bayesian estimates .pdf}
}

@article{kermackContributionMathematicalTheory1927,
  title = {A Contribution to the Mathematical Theory of Epidemics},
  author = {Kermack, William Ogilvy and McKendrick, A. G. and Walker, Gilbert Thomas},
  date = {1927-08},
  journaltitle = {Proceedings of the Royal Society of London. Series A, Containing Papers of a Mathematical and Physical Character},
  volume = {115},
  number = {772},
  pages = {700--721},
  publisher = {{Royal Society}},
  doi = {10.1098/rspa.1927.0118},
  url = {https://royalsocietypublishing.org/doi/10.1098/rspa.1927.0118},
  urldate = {2022-11-17},
  abstract = {(1) One of the most striking features in the study of epidemics is the difficulty of finding a causal factor which appears to be adequate to account for the magnitude of the frequent epidemics of disease which visit almost every population. It was with a view to obtaining more insight regarding the effects of the various factors which govern the spread of contagious epidemics that the present investigation was undertaken. Reference may here be made to the work of Ross and Hudson (1915-17) in which the same problem is attacked. The problem is here carried to a further stage, and it is considered from a point of view which is in one sense more general. The problem may be summarised as follows: One (or more) infected person is introduced into a community of individuals, more or less susceptible to the disease in question. The disease spreads from the affected to the unaffected by contact infection. Each infected person runs through the course of his sickness, and finally is removed from the number of those who are sick, by recovery or by death. The chances of recovery or death vary from day to day during the course of his illness. The chances that the affected may convey infection to the unaffected are likewise dependent upon the stage of the sickness. As the epidemic spreads, the number of unaffected members of the community becomes reduced. Since the course of an epidemic is short compared with the life of an individual, the population may be considered as remaining constant, except in as far as it is modified by deaths due to the epidemic disease itself. In the course of time the epidemic may come to an end. One of the most important probems in epidemiology is to ascertain whether this termination occurs only when no susceptible individuals are left, or whether the interplay of the various factors of infectivity, recovery and mortality, may result in termination, whilst many susceptible individuals are still present in the unaffected population. It is difficult to treat this problem in its most general aspect. In the present communication discussion will be limited to the case in which all members of the community are initially equally susceptible to the disease, and it will be further assumed that complete immunity is conferred by a single infection.},
  file = {/Users/michelsen/Zotero/storage/D8W6ATJM/kermack1927.pdf.pdf;/Users/michelsen/Zotero/storage/FRPQY5W8/Kermack et al. - 1927 - A contribution to the mathematical theory of epide.pdf}
}

@unpublished{killoranGeneratingDesigningDNA2017,
  title = {Generating and Designing {{DNA}} with Deep Generative Models},
  author = {Killoran, Nathan and Lee, Leo J. and Delong, Andrew and Duvenaud, David and Frey, Brendan J.},
  date = {2017-12-17},
  eprint = {1712.06148},
  eprinttype = {arxiv},
  primaryclass = {cs, q-bio, stat},
  url = {http://arxiv.org/abs/1712.06148},
  urldate = {2019-10-01},
  abstract = {We propose generative neural network methods to generate DNA sequences and tune them to have desired properties. We present three approaches: creating synthetic DNA sequences using a generative adversarial network; a DNA-based variant of the activation maximization ("deep dream") design method; and a joint procedure which combines these two approaches together. We show that these tools capture important structures of the data and, when applied to designing probes for protein binding microarrays, allow us to generate new sequences whose properties are estimated to be superior to those found in the training data. We believe that these results open the door for applying deep generative models to advance genomics research.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Machine Learning,dna,GAN,Quantitative Biology - Genomics,Statistics - Machine Learning},
  file = {/Users/michelsen/Zotero/storage/H96ZJIXN/Killoran et al. - 2017 - Generating and designing DNA with deep generative .pdf;/Users/michelsen/Zotero/storage/4ICGP6CQ/1712.html}
}

@article{korneliussenANGSDAnalysisNext2014,
  title = {{{ANGSD}}: {{Analysis}} of {{Next Generation Sequencing Data}}},
  shorttitle = {{{ANGSD}}},
  author = {Korneliussen, Thorfinn Sand and Albrechtsen, Anders and Nielsen, Rasmus},
  date = {2014-11-25},
  journaltitle = {BMC Bioinformatics},
  shortjournal = {BMC Bioinformatics},
  volume = {15},
  number = {1},
  pages = {356},
  issn = {1471-2105},
  doi = {10.1186/s12859-014-0356-4},
  abstract = {High-throughput DNA sequencing technologies are generating vast amounts of data. Fast, flexible and memory efficient implementations are needed in order to facilitate analyses of thousands of samples simultaneously.},
  file = {/Users/michelsen/Zotero/storage/RVY8WNI4/Korneliussen et al. - 2014 - ANGSD Analysis of Next Generation Sequencing Data.pdf;/Users/michelsen/Zotero/storage/4QCYKUQB/s12859-014-0356-4.html}
}

@article{krauseCompleteMitochondrialDNA2010,
  title = {The Complete Mitochondrial {{DNA}} Genome of an Unknown Hominin from Southern {{Siberia}}},
  author = {Krause, Johannes and Fu, Qiaomei and Good, Jeffrey M. and Viola, Bence and Shunkov, Michael V. and Derevianko, Anatoli P. and Pääbo, Svante},
  date = {2010-04},
  journaltitle = {Nature},
  volume = {464},
  number = {7290},
  pages = {894--897},
  publisher = {{Nature Publishing Group}},
  issn = {1476-4687},
  doi = {10.1038/nature08976},
  abstract = {Ancient mitochondrial DNA from a hominin individual who lived in the Altai Mountains in Southern Siberia between 48,000 and 30,000 years ago has been sequenced (http://go.nature.com/sokd1Ffor News story). Comparative genomics suggest that this mtDNA derives from an out-of-Africa migration distinct from those that gave rise to Neanderthals and modern humans. The stratigraphy of the Denisova Cave where the bone — part of the fifth 'little finger' digit — was excavated in 2008, suggests that this hominin lived close geographically to Neanderthals and modern humans, and at the same time. Taken with the presence of Homo floresiensis in Indonesia about 17,000 years ago, this discovery suggests that multiple late Pleistocene hominin lineages coexisted for long periods of time in Eurasia.},
  issue = {7290},
  langid = {english},
  keywords = {Anthropology,Genomics,Mitochondrial genome},
  file = {/Users/michelsen/Zotero/storage/7CA6P58N/Krause et al. - 2010 - The complete mitochondrial DNA genome of an unknow.pdf;/Users/michelsen/Zotero/storage/NF3GMGWB/krause2010.pdf.pdf;/Users/michelsen/Zotero/storage/N2L4XIDG/nature08976.html}
}

@article{krogerAnalyticalSolutionSIRmodel2020,
  title = {Analytical Solution of the {{SIR-model}} for the Temporal Evolution of Epidemics. {{Part A}}: Time-Independent Reproduction Factor},
  shorttitle = {Analytical Solution of the {{SIR-model}} for the Temporal Evolution of Epidemics. {{Part A}}},
  author = {Kröger, M and Schlickeiser, R},
  date = {2020-11-18},
  journaltitle = {Journal of Physics A: Mathematical and Theoretical},
  shortjournal = {J. Phys. A: Math. Theor.},
  volume = {53},
  number = {50},
  pages = {505601},
  issn = {1751-8113, 1751-8121},
  doi = {10.1088/1751-8121/abc65d},
  url = {https://iopscience.iop.org/article/10.1088/1751-8121/abc65d},
  urldate = {2022-11-17},
  abstract = {Abstract                            We revisit the susceptible-infectious-recovered/removed (SIR) model which is one of the simplest compartmental models. Many epidemological models are derivatives of this basic form. While an analytic solution to the SIR model is known in parametric form for the case of a time-independent infection rate, we derive an analytic solution for the more general case of a time-dependent infection rate, that is not limited to a certain range of parameter values. Our approach allows us to derive several exact analytic results characterizing all quantities, and moreover explicit, non-parametric, and accurate analytic approximants for the solution of the SIR model for time-independent infection rates. We relate all parameters of the SIR model to a measurable, usually reported quantity, namely the cumulated number of infected population and its first and second derivatives at an initial time               t               = 0, where data is assumed to be available. We address the question of how well the differential rate of infections is captured by the Gauss model (GM). To this end we calculate the peak height, width, and position of the bell-shaped rate analytically. We find that the SIR is captured by the GM within a range of times, which we discuss in detail. We prove that the SIR model exhibits an asymptotic behavior at large times that is different from the logistic model, while the difference between the two models still decreases with increasing reproduction factor. This part A of our work treats the original SIR model to hold at all times, while this assumption will be relaxed in part B. Relaxing this assumption allows us to formulate initial conditions incompatible with the original SIR model.},
  langid = {english}
}

@inproceedings{lamNumbaLLVMbasedPython2015,
  title = {Numba: A {{LLVM-based Python JIT}} Compiler},
  shorttitle = {Numba},
  booktitle = {Proceedings of the {{Second Workshop}} on the {{LLVM Compiler Infrastructure}} in {{HPC}}},
  author = {Lam, Siu Kwan and Pitrou, Antoine and Seibert, Stanley},
  date = {2015-11-15},
  series = {{{LLVM}} '15},
  pages = {1--6},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  doi = {10.1145/2833157.2833162},
  url = {https://github.com/numba/numba},
  abstract = {Dynamic, interpreted languages, like Python, are attractive for domain-experts and scientists experimenting with new ideas. However, the performance of the interpreter is often a barrier when scaling to larger data sets. This paper presents a just-in-time compiler for Python that focuses in scientific and array-oriented computing. Starting with the simple syntax of Python, Numba compiles a subset of the language into efficient machine code that is comparable in performance to a traditional compiled language. In addition, we share our experience in building a JIT compiler using LLVM[1].},
  isbn = {978-1-4503-4005-2},
  keywords = {compiler,LLVM,numba,Python},
  file = {/Users/michelsen/Zotero/storage/UDC9TQRX/Lam et al. - 2015 - Numba a LLVM-based Python JIT compiler.pdf}
}

@article{liInferenceHumanPopulation2011,
  title = {Inference of Human Population History from Individual Whole-Genome Sequences},
  author = {Li, Heng and Durbin, Richard},
  date = {2011-07},
  journaltitle = {Nature},
  volume = {475},
  number = {7357},
  pages = {493--496},
  issn = {1476-4687},
  doi = {10.1038/nature10231},
  abstract = {The history of human population size is important for understanding human evolution. Various studies1,2,3,4,5 have found evidence for a founder event (bottleneck) in East Asian and European populations, associated with the human dispersal out-of-Africa event around 60 thousand years (kyr) ago. However, these studies have had to assume simplified demographic models with few parameters, and they do not provide a precise date for the start and stop times of the bottleneck. Here, with fewer assumptions on population size changes, we present a more detailed history of human population sizes between approximately ten thousand and a million years ago, using the pairwise sequentially Markovian coalescent model applied to the complete diploid genome sequences of a Chinese male (YH)6, a Korean male (SJK)7, three European individuals (J. C. Venter8, NA12891 and NA12878 (ref. 9)) and two Yoruba males (NA18507 (ref. 10) and NA19239). We infer that European and Chinese populations had very similar population-size histories before 10–20 kyr ago. Both populations experienced a severe bottleneck 10–60 kyr ago, whereas African populations experienced a milder bottleneck from which they recovered earlier. All three populations have an elevated effective population size between 60 and 250 kyr ago, possibly due to population substructure11. We also infer that the differentiation of genetically modern humans may have started as early as 100–120 kyr ago12, but considerable genetic exchanges may still have occurred until 20–40 kyr ago.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/F6P22Y3N/Li and Durbin - 2011 - Inference of human population history from individ.pdf;/Users/michelsen/Zotero/storage/LLCR4JBW/nature10231.html}
}

@article{lundbergExplainableMachinelearningPredictions2018a,
  title = {Explainable Machine-Learning Predictions for the Prevention of Hypoxaemia during Surgery},
  author = {Lundberg, Scott M. and Nair, Bala and Vavilala, Monica S. and Horibe, Mayumi and Eisses, Michael J. and Adams, Trevor and Liston, David E. and Low, Daniel King-Wai and Newman, Shu-Fang and Kim, Jerry and Lee, Su-In},
  date = {2018-10},
  journaltitle = {Nature Biomedical Engineering},
  shortjournal = {Nat Biomed Eng},
  volume = {2},
  number = {10},
  pages = {749--760},
  publisher = {{Nature Publishing Group}},
  issn = {2157-846X},
  doi = {10.1038/s41551-018-0304-0},
  url = {https://www.nature.com/articles/s41551-018-0304-0},
  urldate = {2022-11-17},
  abstract = {Although anaesthesiologists strive to avoid hypoxaemia during surgery, reliably predicting future intraoperative hypoxaemia is not possible at present. Here, we report the development and testing of a machine-learning-based system that predicts the risk of hypoxaemia and provides explanations of the risk factors in real time during general anaesthesia. The system, which was trained on minute-by-minute data from the electronic medical records of over 50,000 surgeries, improved the performance of anaesthesiologists by providing interpretable hypoxaemia risks and contributing factors. The explanations for the predictions are broadly consistent with the literature and with prior knowledge from anaesthesiologists. Our results suggest that if anaesthesiologists currently anticipate 15\% of hypoxaemia events, with the assistance of this system they could anticipate 30\%, a large portion of which may benefit from early intervention because they are associated with modifiable factors. The system can help improve the clinical understanding of hypoxaemia risk during anaesthesia care by providing general insights into the exact changes in risk induced by certain characteristics of the patient or procedure.},
  issue = {10},
  langid = {english},
  keywords = {Computational science,Health care},
  file = {/Users/michelsen/Zotero/storage/LTLWX2YG/lundberg2018.pdf.pdf;/Users/michelsen/Zotero/storage/YN3RSEG4/Lundberg et al. - 2018 - Explainable machine-learning predictions for the p.pdf;/Users/michelsen/Zotero/storage/NVHGE7HG/s41551-018-0304-0.html}
}

@article{lundbergLocalExplanationsGlobal2020,
  title = {From Local Explanations to Global Understanding with Explainable {{AI}} for Trees},
  author = {Lundberg, Scott M. and Erion, Gabriel and Chen, Hugh and DeGrave, Alex and Prutkin, Jordan M. and Nair, Bala and Katz, Ronit and Himmelfarb, Jonathan and Bansal, Nisha and Lee, Su-In},
  date = {2020-01},
  journaltitle = {Nature Machine Intelligence},
  shortjournal = {Nat Mach Intell},
  volume = {2},
  number = {1},
  pages = {56--67},
  publisher = {{Nature Publishing Group}},
  issn = {2522-5839},
  doi = {10.1038/s42256-019-0138-9},
  url = {https://www.nature.com/articles/s42256-019-0138-9},
  urldate = {2022-11-17},
  abstract = {Tree-based machine learning models such as random forests, decision trees and gradient boosted trees are popular nonlinear predictive models, yet comparatively little attention has been paid to explaining their predictions. Here we improve the interpretability of tree-based models through three main contributions. (1) A polynomial time algorithm to compute optimal explanations based on game theory. (2) A new type of explanation that directly measures local feature interaction effects. (3) A new set of tools for understanding global model structure based on combining many local explanations of each prediction. We apply these tools to three medical machine learning problems and show how combining many high-quality local explanations allows us to represent global structure while retaining local faithfulness to the original model. These tools enable us to (1) identify high-magnitude but low-frequency nonlinear mortality risk factors in the US population, (2) highlight distinct population subgroups with shared risk characteristics, (3) identify nonlinear interaction effects among risk factors for chronic kidney disease and (4) monitor a machine learning model deployed in a hospital by identifying which features are degrading the model’s performance over time. Given the popularity of tree-based machine learning models, these improvements to their interpretability have implications across a broad set of domains.},
  issue = {1},
  langid = {english},
  keywords = {Computer science,Medical research,Software},
  file = {/Users/michelsen/Zotero/storage/G3S6ZF2S/lundberg2020.pdf.pdf;/Users/michelsen/Zotero/storage/P9BX6BD4/Lundberg et al. - 2020 - From local explanations to global understanding wi.pdf}
}

@inproceedings{lundbergUnifiedApproachInterpreting2017,
  title = {A {{Unified Approach}} to {{Interpreting Model Predictions}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Lundberg, Scott M and Lee, Su-In},
  date = {2017},
  volume = {30},
  publisher = {{Curran Associates, Inc.}},
  url = {https://proceedings.neurips.cc/paper/2017/hash/8a20a8621978632d76c43dfd28b67767-Abstract.html},
  urldate = {2022-11-17},
  abstract = {Understanding why a model makes a certain prediction can be as crucial as the prediction's accuracy in many applications. However, the highest accuracy for large modern datasets is often achieved by complex models that even experts struggle to interpret, such as ensemble or deep learning models, creating a tension between accuracy and interpretability. In response, various methods have recently been proposed to help users interpret the predictions of complex models, but it is often unclear how these methods are related and when one method is preferable over another. To address this problem, we present a unified framework for interpreting predictions, SHAP (SHapley Additive exPlanations). SHAP assigns each feature an importance value for a particular prediction. Its novel components include: (1) the identification of a new class of additive feature importance measures, and (2) theoretical results showing there is a unique solution in this class with a set of desirable properties. The new class unifies six existing methods, notable because several recent methods in the class lack the proposed desirable properties. Based on insights from this unification, we present new methods that show improved computational performance and/or better consistency with human intuition than previous approaches.},
  file = {/Users/michelsen/Zotero/storage/DXD675E2/Lundberg and Lee - 2017 - A Unified Approach to Interpreting Model Predictio.pdf}
}

@article{makComparativePerformanceBGISEQ5002017,
  title = {Comparative Performance of the {{BGISEQ-500}} vs {{Illumina HiSeq2500}} Sequencing Platforms for Palaeogenomic Sequencing},
  author = {Mak, Sarah Siu Tze and Gopalakrishnan, Shyam and Carøe, Christian and Geng, Chunyu and Liu, Shanlin and Sinding, Mikkel-Holger S and Kuderna, Lukas F K and Zhang, Wenwei and Fu, Shujin and Vieira, Filipe G and Germonpré, Mietje and Bocherens, Hervé and Fedorov, Sergey and Petersen, Bent and Sicheritz-Pontén, Thomas and Marques-Bonet, Tomas and Zhang, Guojie and Jiang, Hui and Gilbert, M Thomas P},
  date = {2017-06-26},
  journaltitle = {GigaScience},
  shortjournal = {Gigascience},
  volume = {6},
  number = {8},
  eprint = {28854615},
  eprinttype = {pmid},
  pages = {1--13},
  issn = {2047-217X},
  doi = {10.1093/gigascience/gix049},
  abstract = {Ancient DNA research has been revolutionized following development of next-generation sequencing platforms. Although a number of such platforms have been applied to ancient DNA samples, the Illumina series are the dominant choice today, mainly because of high production capacities and short read production. Recently a potentially attractive alternative platform for palaeogenomic data generation has been developed, the BGISEQ-500, whose sequence output are comparable with the Illumina series. In this study, we modified the standard BGISEQ-500 library preparation specifically for use on degraded DNA, then directly compared the sequencing performance and data quality of the BGISEQ-500 to the Illumina HiSeq2500 platform on DNA extracted from 8 historic and ancient dog and wolf samples. The data generated were largely comparable between sequencing platforms, with no statistically significant difference observed for parameters including level (P = 0.371) and average sequence length (P = 0718) of endogenous nuclear DNA, sequence GC content (P = 0.311), double-stranded DNA damage rate (v. 0.309), and sequence clonality (P = 0.093). Small significant differences were found in single-strand DNA damage rate (δS; slightly lower for the BGISEQ-500, P = 0.011) and the background rate of difference from the reference genome (θ; slightly higher for BGISEQ-500, P = 0.012). This may result from the differences in amplification cycles used to polymerase chain reaction–amplify the libraries. A significant difference was also observed in the mitochondrial DNA percentages recovered (P = 0.018), although we believe this is likely a stochastic effect relating to the extremely low levels of mitochondria that were sequenced from 3 of the samples with overall very low levels of endogenous DNA. Although we acknowledge that our analyses were limited to animal material, our observations suggest that the BGISEQ-500 holds the potential to represent a valid and potentially valuable alternative platform for palaeogenomic data generation that is worthy of future exploration by those interested in the sequencing and analysis of degraded DNA.},
  pmcid = {PMC5570000},
  file = {/Users/michelsen/Zotero/storage/X3XQ9ILX/Mak et al. - 2017 - Comparative performance of the BGISEQ-500 vs Illum.pdf}
}

@article{manleyHighdensityMappingSinglemolecule2008,
  title = {High-Density Mapping of Single-Molecule Trajectories with Photoactivated Localization Microscopy},
  author = {Manley, Suliana and Gillette, Jennifer M. and Patterson, George H. and Shroff, Hari and Hess, Harald F. and Betzig, Eric and Lippincott-Schwartz, Jennifer},
  date = {2008-02},
  journaltitle = {Nature Methods},
  shortjournal = {Nat Methods},
  volume = {5},
  number = {2},
  pages = {155--157},
  publisher = {{Nature Publishing Group}},
  issn = {1548-7105},
  doi = {10.1038/nmeth.1176},
  url = {https://www.nature.com/articles/nmeth.1176},
  urldate = {2022-11-18},
  abstract = {We combined photoactivated localization microscopy (PALM) with live-cell single-particle tracking to create a new method termed sptPALM. We created spatially resolved maps of single-molecule motions by imaging the membrane proteins Gag and VSVG, and obtained several orders of magnitude more trajectories per cell than traditional single-particle tracking enables. By probing distinct subsets of molecules, sptPALM can provide insight into the origins of spatial and temporal heterogeneities in membranes.},
  issue = {2},
  langid = {english},
  keywords = {Bioinformatics,Biological Microscopy,Biological Techniques,Biomedical Engineering/Biotechnology,general,Life Sciences,Proteomics},
  file = {/Users/michelsen/Zotero/storage/C97CFIWV/manley2008.pdf.pdf;/Users/michelsen/Zotero/storage/EJSILYJH/Manley et al. - 2008 - High-density mapping of single-molecule trajectori.pdf;/Users/michelsen/Zotero/storage/B47CW9RP/nmeth.html}
}

@article{martinianoRemovingReferenceBias2020,
  title = {Removing Reference Bias and Improving Indel Calling in Ancient {{DNA}} Data Analysis by Mapping to a Sequence Variation Graph},
  author = {Martiniano, Rui and Garrison, Erik and Jones, Eppie R. and Manica, Andrea and Durbin, Richard},
  date = {2020-09-17},
  journaltitle = {Genome Biology},
  shortjournal = {Genome Biology},
  volume = {21},
  number = {1},
  pages = {250},
  issn = {1474-760X},
  doi = {10.1186/s13059-020-02160-7},
  abstract = {During the last decade, the analysis of ancient DNA (aDNA) sequence has become a powerful tool for the study of past human populations. However, the degraded nature of aDNA means that aDNA molecules are short and frequently mutated by post-mortem chemical modifications. These features decrease read mapping accuracy and increase reference bias, in which reads containing non-reference alleles are less likely to be mapped than those containing reference alleles. Alternative approaches have been developed to replace the linear reference with a variation graph which includes known alternative variants at each genetic locus. Here, we evaluate the use of variation graph software vg to avoid reference bias for aDNA and compare with existing methods.},
  keywords = {Ancient DNA,Reference bias,Sequence alignment,Variation graph},
  file = {/Users/michelsen/Zotero/storage/H4XE7YZQ/Martiniano et al. - 2020 - Removing reference bias and improving indel callin.pdf;/Users/michelsen/Zotero/storage/KAVVBHIH/10.1186@s13059-020-02160-7.pdf.pdf;/Users/michelsen/Zotero/storage/I8MAVB8Z/s13059-020-02160-7.html}
}

@book{mcelreathStatisticalRethinkingBayesian2020,
  title = {Statistical Rethinking: A {{Bayesian}} Course with Examples in {{R}} and {{Stan}}},
  shorttitle = {Statistical Rethinking},
  author = {McElreath, Richard},
  date = {2020},
  series = {{{CRC}} Texts in Statistical Science},
  edition = {2},
  publisher = {{Taylor and Francis, CRC Press}},
  location = {{Boca Raton}},
  abstract = {"Statistical Rethinking: A Bayesian Course with Examples in R and Stan, Second Edition builds knowledge/confidence in statistical modeling. Pushes readers to perform step-by-step calculations (usually automated.) Unique, computational approach ensures readers understand details to make reasonable choices and interpretations in their modeling work"--},
  isbn = {978-0-367-13991-9}
}

@article{mckennaGenomeAnalysisToolkit2010,
  title = {The {{Genome Analysis Toolkit}}: A {{MapReduce}} Framework for Analyzing next-Generation {{DNA}} Sequencing Data},
  shorttitle = {The {{Genome Analysis Toolkit}}},
  author = {McKenna, Aaron and Hanna, Matthew and Banks, Eric and Sivachenko, Andrey and Cibulskis, Kristian and Kernytsky, Andrew and Garimella, Kiran and Altshuler, David and Gabriel, Stacey and Daly, Mark and DePristo, Mark A.},
  date = {2010-09},
  journaltitle = {Genome Research},
  shortjournal = {Genome Res},
  volume = {20},
  number = {9},
  eprint = {20644199},
  eprinttype = {pmid},
  pages = {1297--1303},
  issn = {1549-5469},
  doi = {10.1101/gr.107524.110},
  abstract = {Next-generation DNA sequencing (NGS) projects, such as the 1000 Genomes Project, are already revolutionizing our understanding of genetic variation among individuals. However, the massive data sets generated by NGS--the 1000 Genome pilot alone includes nearly five terabases--make writing feature-rich, efficient, and robust analysis tools difficult for even computationally sophisticated individuals. Indeed, many professionals are limited in the scope and the ease with which they can answer scientific questions by the complexity of accessing and manipulating the data produced by these machines. Here, we discuss our Genome Analysis Toolkit (GATK), a structured programming framework designed to ease the development of efficient and robust analysis tools for next-generation DNA sequencers using the functional programming philosophy of MapReduce. The GATK provides a small but rich set of data access patterns that encompass the majority of analysis tool needs. Separating specific analysis calculations from common data management infrastructure enables us to optimize the GATK framework for correctness, stability, and CPU and memory efficiency and to enable distributed and shared memory parallelization. We highlight the capabilities of the GATK by describing the implementation and application of robust, scale-tolerant tools like coverage calculators and single nucleotide polymorphism (SNP) calling. We conclude that the GATK programming framework enables developers and analysts to quickly and easily write efficient and robust NGS tools, many of which have already been incorporated into large-scale sequencing projects like the 1000 Genomes Project and The Cancer Genome Atlas.},
  langid = {english},
  pmcid = {PMC2928508},
  keywords = {Base Sequence,Genome,Genomics,Sequence Analysis; DNA,Software},
  file = {/Users/michelsen/Zotero/storage/JMQWTD9P/McKenna et al. - 2010 - The Genome Analysis Toolkit a MapReduce framework.pdf;/Users/michelsen/Zotero/storage/ZGZA63AH/mckenna2010.pdf.pdf}
}

@book{mendelgregorVersucheUberPflanzenhybriden1866,
  title = {Versuche Über Pflanzen-Hybriden},
  author = {{Mendel, Gregor}},
  date = {1866},
  pages = {464},
  publisher = {{Brünn, Im Verlage des Vereines, 1866}},
  url = {https://www.biodiversitylibrary.org/item/124139},
  copyright = {NOT\textsubscript{I}N\textsubscript{C}OPYRIGHT},
  keywords = {Mendel's law|Plant hybridization|}
}

@article{meyerNuclearDNASequences2016a,
  title = {Nuclear {{DNA}} Sequences from the {{Middle Pleistocene Sima}} de Los {{Huesos}} Hominins},
  author = {Meyer, Matthias and Arsuaga, Juan-Luis and de Filippo, Cesare and Nagel, Sarah and Aximu-Petri, Ayinuer and Nickel, Birgit and Martínez, Ignacio and Gracia, Ana and de Castro, José María Bermúdez and Carbonell, Eudald and Viola, Bence and Kelso, Janet and Prüfer, Kay and Pääbo, Svante},
  options = {useprefix=true},
  date = {2016-03},
  journaltitle = {Nature},
  volume = {531},
  number = {7595},
  pages = {504--507},
  issn = {1476-4687},
  doi = {10.1038/nature17405},
  abstract = {A unique assemblage of 28 hominin individuals, found in Sima de los Huesos in the Sierra de Atapuerca in Spain, has recently been dated to approximately 430,000 years ago1. An interesting question is how these Middle Pleistocene hominins were related to those who lived in the Late Pleistocene epoch, in particular to Neanderthals in western Eurasia and to Denisovans, a sister group of Neanderthals so far known only from southern Siberia. While the Sima de los Huesos hominins share some derived morphological features with Neanderthals, the mitochondrial genome retrieved from one individual from Sima de los Huesos is more closely related to the mitochondrial DNA of Denisovans than to that of Neanderthals2. However, since the mitochondrial DNA does not reveal the full picture of relationships among populations, we have investigated DNA preservation in several individuals found at Sima de los Huesos. Here we recover nuclear DNA sequences from two specimens, which show that the Sima de los Huesos hominins were related to Neanderthals rather than to Denisovans, indicating that the population divergence between Neanderthals and Denisovans predates 430,000 years ago. A mitochondrial DNA recovered from one of the specimens shares the previously described relationship to Denisovan mitochondrial DNAs, suggesting, among other possibilities, that the mitochondrial DNA gene pool of Neanderthals turned over later in their history.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/4J452WUH/Meyer et al. - 2016 - Nuclear DNA sequences from the Middle Pleistocene .pdf;/Users/michelsen/Zotero/storage/8STA35B9/nature17405.html}
}

@thesis{michelsenPhysicistApproachMachine2020,
  title = {A Physicist’s Approach to Machine Learning – Understanding the Basic Bricks},
  author = {Michelsen, Christian},
  date = {2020},
  institution = {{University of Copenhagen}}
}

@article{mullisSpecificEnzymaticAmplification1986,
  title = {Specific Enzymatic Amplification of {{DNA}} in Vitro: The Polymerase Chain Reaction},
  shorttitle = {Specific Enzymatic Amplification of {{DNA}} in Vitro},
  author = {Mullis, K. and Faloona, F. and Scharf, S. and Saiki, R. and Horn, G. and Erlich, H.},
  date = {1986},
  journaltitle = {Cold Spring Harbor Symposia on Quantitative Biology},
  shortjournal = {Cold Spring Harb Symp Quant Biol},
  volume = {51 Pt 1},
  eprint = {3472723},
  eprinttype = {pmid},
  pages = {263--273},
  issn = {0091-7451},
  doi = {10.1101/sqb.1986.051.01.032},
  langid = {english},
  keywords = {Alleles,Base Sequence,Cell Line,Cloning; Molecular,DNA,DNA-Directed DNA Polymerase,Genes,Humans,Templates; Genetic},
  file = {/Users/michelsen/Zotero/storage/6557LKBM/mullis1986.pdf.pdf}
}

@book{murphyMachineLearningProbabilistic2012,
  title = {Machine Learning: {{A}} Probabilistic Perspective},
  author = {Murphy, Kevin P.},
  date = {2012},
  publisher = {{The MIT Press}},
  abstract = {Today's Web-enabled deluge of electronic data calls for automated methods of data analysis. Machine learning provides these, developing methods that can automatically detect patterns in data and then use the uncovered patterns to predict future data. This textbook offers a comprehensive and self-contained introduction to the field of machine learning, based on a unified, probabilistic approach. The coverage combines breadth and depth, offering necessary background material on such topics as probability, optimization, and linear algebra as well as discussion of recent developments in the field, including conditional random fields, L1 regularization, and deep learning. The book is written in an informal, accessible style, complete with pseudo-code for the most important algorithms. All topics are copiously illustrated with color images and worked examples drawn from such application domains as biology, text processing, computer vision, and robotics. Rather than providing a cookbook of different heuristic methods, the book stresses a principled model-based approach, often using the language of graphical models to specify models in a concise and intuitive way. Almost all the models described have been implemented in a MATLAB software package–PMTK (probabilistic modeling toolkit)–that is freely available online. The book is suitable for upper-level undergraduates with an introductory-level college math background and beginning graduate students.},
  isbn = {0-262-01802-0}
}

@book{nealMCMCUsingHamiltonian2011,
  title = {{{MCMC Using Hamiltonian Dynamics}}},
  author = {Neal, Radford M.},
  date = {2011-05-10},
  publisher = {{Routledge Handbooks Online}},
  doi = {10.1201/b10905-7},
  url = {https://www.routledgehandbooks.com/doi/10.1201/b10905-7},
  urldate = {2022-11-16},
  abstract = {Since their popularization in the 1990s, Markov chain Monte Carlo (MCMC) methods have revolutionized statistical computing and have had an especially profound impact on the practice of Bayesian statistics. Furthermore, MCMC methods have enabled the development and use of intricate models in an astonishing array of disciplines as diverse as fisheries science and economics. The wide-ranging practical importance of MCMC has sparked an expansive and deep investigation into fundamental Markov chain theory. The Handbook of Markov Chain Monte Carlo provides a reference for the broad audience of developers and users of MCMC methodology interested in keeping up with cutting-edge theory and applications. The first half of the book covers MCMC foundations, methodology, and algorithms. The second half considers the use of MCMC in a variety of practical applications including in educational research, astrophysics, brain imaging, ecology, and sociology. The in-depth introductory section of the book allows graduate students and practicing scientists new to MCMC to become thoroughly acquainted with the basic theory, algorithms, and applications. The book supplies detailed examples and case studies of realistic scientific problems presenting the diversity of methods used by the wide-ranging MCMC community. Those familiar with MCMC methods will find this book a useful refresher of current theory and recent developments.},
  isbn = {978-1-4200-7941-8 978-1-4200-7942-5},
  langid = {english}
}

@unpublished{ngDna2vecConsistentVector2017,
  title = {Dna2vec: {{Consistent}} Vector Representations of Variable-Length k-Mers},
  shorttitle = {Dna2vec},
  author = {Ng, Patrick},
  date = {2017-01-23},
  eprint = {1701.06279},
  eprinttype = {arxiv},
  primaryclass = {cs, q-bio, stat},
  url = {http://arxiv.org/abs/1701.06279},
  urldate = {2019-10-01},
  abstract = {One of the ubiquitous representation of long DNA sequence is dividing it into shorter k-mer components. Unfortunately, the straightforward vector encoding of k-mer as a one-hot vector is vulnerable to the curse of dimensionality. Worse yet, the distance between any pair of one-hot vectors is equidistant. This is particularly problematic when applying the latest machine learning algorithms to solve problems in biological sequence analysis. In this paper, we propose a novel method to train distributed representations of variable-length k-mers. Our method is based on the popular word embedding model word2vec, which is trained on a shallow two-layer neural network. Our experiments provide evidence that the summing of dna2vec vectors is akin to nucleotides concatenation. We also demonstrate that there is correlation between Needleman-Wunsch similarity score and cosine similarity of dna2vec vectors.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Computation and Language,Computer Science - Machine Learning,dna,dna2vec,embedding,Quantitative Biology - Quantitative Methods,Statistics - Machine Learning},
  file = {/Users/michelsen/Zotero/storage/YZQUSFFJ/Ng - 2017 - dna2vec Consistent vector representations of vari.pdf;/Users/michelsen/Zotero/storage/CHA7Y2FC/1701.html}
}

@article{nielsenGenotypeSNPCalling2011,
  title = {Genotype and {{SNP}} Calling from Next-Generation Sequencing Data},
  author = {Nielsen, Rasmus and Paul, Joshua S. and Albrechtsen, Anders and Song, Yun S.},
  date = {2011-06},
  journaltitle = {Nature reviews. Genetics},
  shortjournal = {Nat Rev Genet},
  volume = {12},
  number = {6},
  eprint = {21587300},
  eprinttype = {pmid},
  pages = {443--451},
  issn = {1471-0056},
  doi = {10.1038/nrg2986},
  abstract = {Meaningful analysis of next-generation sequencing (NGS) data, which are produced extensively by genetics and genomics studies, relies crucially on the accurate calling of SNPs and genotypes. Recently developed statistical methods both improve and quantify the considerable uncertainty associated with genotype calling, and will especially benefit the growing number of studies using low- to medium-coverage data. We review these methods and provide a guide for their use in NGS studies.},
  pmcid = {PMC3593722},
  file = {/Users/michelsen/Zotero/storage/BL7KEMGY/nielsen2011.pdf.pdf;/Users/michelsen/Zotero/storage/Q9LNHW3V/Nielsen et al. - 2011 - Genotype and SNP calling from next-generation sequ.pdf}
}

@article{nielsenTracingPeoplingWorld2017,
  title = {Tracing the Peopling of the World through Genomics},
  author = {Nielsen, Rasmus and Akey, Joshua M. and Jakobsson, Mattias and Pritchard, Jonathan K. and Tishkoff, Sarah and Willerslev, Eske},
  date = {2017-01},
  journaltitle = {Nature},
  volume = {541},
  number = {7637},
  pages = {302--310},
  issn = {1476-4687},
  doi = {10.1038/nature21347},
  abstract = {Advances in the sequencing and the analysis of the genomes of both modern and ancient peoples have facilitated a number of breakthroughs in our understanding of human evolutionary history. These include the discovery of interbreeding between anatomically modern humans and extinct hominins; the development of an increasingly detailed description of the complex dispersal of modern humans out of Africa and their population expansion worldwide; and the characterization of many of the genetic adaptions of humans to local environmental conditions. Our interpretation of the evolutionary history and adaptation of humans is being transformed by analyses of these new genomic data.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/MQ5Z6BM8/Nielsen et al. - 2017 - Tracing the peopling of the world through genomics.pdf;/Users/michelsen/Zotero/storage/K8I2HX53/nature21347.html}
}

@article{orlandoRecalibratingEquusEvolution2013,
  title = {Recalibrating {{Equus}} Evolution Using the Genome Sequence of an Early {{Middle Pleistocene}} Horse},
  author = {Orlando, Ludovic and Ginolhac, Aurélien and Zhang, Guojie and Froese, Duane and Albrechtsen, Anders and Stiller, Mathias and Schubert, Mikkel and Cappellini, Enrico and Petersen, Bent and Moltke, Ida and Johnson, Philip L. F. and Fumagalli, Matteo and Vilstrup, Julia T. and Raghavan, Maanasa and Korneliussen, Thorfinn and Malaspinas, Anna-Sapfo and Vogt, Josef and Szklarczyk, Damian and Kelstrup, Christian D. and Vinther, Jakob and Dolocan, Andrei and Stenderup, Jesper and Velazquez, Amhed M. V. and Cahill, James and Rasmussen, Morten and Wang, Xiaoli and Min, Jiumeng and Zazula, Grant D. and Seguin-Orlando, Andaine and Mortensen, Cecilie and Magnussen, Kim and Thompson, John F. and Weinstock, Jacobo and Gregersen, Kristian and Røed, Knut H. and Eisenmann, Véra and Rubin, Carl J. and Miller, Donald C. and Antczak, Douglas F. and Bertelsen, Mads F. and Brunak, Søren and Al-Rasheid, Khaled A. S. and Ryder, Oliver and Andersson, Leif and Mundy, John and Krogh, Anders and Gilbert, M. Thomas P. and Kjær, Kurt and Sicheritz-Ponten, Thomas and Jensen, Lars Juhl and Olsen, Jesper V. and Hofreiter, Michael and Nielsen, Rasmus and Shapiro, Beth and Wang, Jun and Willerslev, Eske},
  date = {2013-07},
  journaltitle = {Nature},
  volume = {499},
  number = {7456},
  pages = {74--78},
  publisher = {{Nature Publishing Group}},
  issn = {1476-4687},
  doi = {10.1038/nature12323},
  abstract = {A low-coverage draft genome sequence from a horse bone recovered from permafrost dated to approximately 560–780 thousand years ago is presented; this represents the oldest full genome sequence to date by almost an order of magnitude.},
  issue = {7456},
  langid = {english},
  keywords = {Evolutionary genetics},
  file = {/Users/michelsen/Zotero/storage/2WL4MPHI/orlando2013.pdf.pdf;/Users/michelsen/Zotero/storage/ITMZ8KJC/Orlando et al. - 2013 - Recalibrating Equus evolution using the genome seq.pdf;/Users/michelsen/Zotero/storage/RSGRY4SB/nature12323.html}
}

@article{oswaldImagingQuantificationTransmembrane2014,
  title = {Imaging and Quantification of Trans-Membrane Protein Diffusion in Living Bacteria},
  author = {Oswald, Felix and Bank, Ernst L. M. and Bollen, Yves J. M. and Peterman, Erwin J. G.},
  date = {2014-06-05},
  journaltitle = {Physical Chemistry Chemical Physics},
  shortjournal = {Phys. Chem. Chem. Phys.},
  volume = {16},
  number = {25},
  pages = {12625--12634},
  publisher = {{The Royal Society of Chemistry}},
  issn = {1463-9084},
  doi = {10.1039/C4CP00299G},
  url = {https://pubs.rsc.org/en/content/articlelanding/2014/cp/c4cp00299g},
  urldate = {2022-11-18},
  abstract = {The cytoplasmic membrane forms the barrier between any cell's interior and the outside world. It contains many proteins that enable essential processes such as the transmission of signals, the uptake of nutrients, and cell division. In the case of prokaryotes, which do not contain intracellular membranes, the cytoplasmic membrane also contains proteins for respiration and protein folding. Mutual interactions and specific localization of these proteins depend on two-dimensional diffusion driven by thermal fluctuations. The experimental investigation of membrane–protein diffusion in bacteria is challenging due to their small size, only a few times larger than the resolution of an optical microscope. Here, we review fluorescence microscopy-based methods to study diffusion of membrane proteins in living bacteria. The main focus is on data-analysis tools to extract diffusion coefficients from single-particle tracking data obtained by single-molecule fluorescence microscopy. We introduce a novel approach, IPODD (inverse projection of displacement distributions), to obtain diffusion coefficients from the usually obtained 2-D projected diffusion trajectories of the highly 3-D curved bacterial membrane. This method provides, in contrast to traditional mean-squared-displacement methods, correct diffusion coefficients and allows unravelling of heterogeneously diffusing populations.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/4DQE6SPQ/Oswald et al. - 2014 - Imaging and quantification of trans-membrane prote.pdf;/Users/michelsen/Zotero/storage/F9N2ZSXW/Oswald et al. - 2014 - Imaging and quantification of trans-membrane prote.pdf;/Users/michelsen/Zotero/storage/IGUV532M/oswald2014.pdf.pdf;/Users/michelsen/Zotero/storage/7Y7X6BNC/c4cp00299g.html}
}

@article{paaboMolecularCloningAncient1985,
  title = {Molecular Cloning of {{Ancient Egyptian}} Mummy {{DNA}}},
  author = {Pääbo, Svante},
  date = {1985-04},
  journaltitle = {Nature},
  volume = {314},
  number = {6012},
  pages = {644--645},
  publisher = {{Nature Publishing Group}},
  issn = {1476-4687},
  doi = {10.1038/314644a0},
  abstract = {Artificial mummification was practised in Egypt from ∼ 2600 BC until the fourth century AD. Because of the dry Egyptian climate, however, there are also many natural mummies preserved from earlier as well as later times. To elucidate whether this unique source of ancient human remains can be used for molecular genetic analyses, 23 mummies were investigated for DNA content. One 2,400-yr-old mummy of a child was found to contain DNA that could be molecularly cloned in a plasmid vector. I report here that one such clone contains two members of the Alu family of human repetitive DNA sequences, as detected by DNA hybridizations and nucleotide sequencing. These analyses show that substantial pieces of mummy DNA (3.4 kilobases) can be cloned and that the DNA fragments seem to contain little or no modifications introduced postmortem.},
  issue = {6012},
  langid = {english},
  keywords = {Humanities and Social Sciences,multidisciplinary,Science},
  file = {/Users/michelsen/Zotero/storage/6AUP92PZ/Pääbo - 1985 - Molecular cloning of Ancient Egyptian mummy DNA.pdf;/Users/michelsen/Zotero/storage/GF2JK7A7/10.1038@314644a0.pdf.pdf;/Users/michelsen/Zotero/storage/6GKS8RPC/314644a0.html}
}

@article{paaboPreservationDNAAncient1985,
  title = {Preservation of {{DNA}} in Ancient {{Egyptian}} Mummies},
  author = {Pääbo, Svante},
  date = {1985-11-01},
  journaltitle = {Journal of Archaeological Science},
  shortjournal = {Journal of Archaeological Science},
  volume = {12},
  number = {6},
  pages = {411--417},
  issn = {0305-4403},
  doi = {10.1016/0305-4403(85)90002-0},
  abstract = {The presence of DNA has been demonstrated in the cell nuclei of an ancient Egyptian mummy fragment. When extracted, this DNA proved to be degraded to a considerable extent and chemically modified. However, the preservation of nucleic acids in this specimen suggests that applying recombinant DNA techniques to the study of ancient mummified tissues might prove to be a fruitful future area of research.},
  langid = {english},
  keywords = {absorbance spectrum,agarose gel electrophoresis,ancient Egyptian mummies,DNA,extraction,fluorescence microscopy,nucleotide analysis},
  file = {/Users/michelsen/Zotero/storage/953EXVK3/Pääbo - 1985 - Preservation of DNA in ancient Egyptian mummies.pdf;/Users/michelsen/Zotero/storage/U4QQDCZW/10.1016@0305-44038590002-0.pdf.pdf;/Users/michelsen/Zotero/storage/G2QX5XVA/0305440385900020.html}
}

@article{palladinoSIR3SIR4Proteins1993,
  title = {{{SIR3}} and {{SIR4}} Proteins Are Required for the Positioning and Integrity of Yeast Telomeres},
  author = {Palladino, F. and Laroche, T. and Gilson, E. and Axelrod, A. and Pillus, L. and Gasser, S.M.},
  date = {1993-11},
  journaltitle = {Cell},
  shortjournal = {Cell},
  volume = {75},
  number = {3},
  pages = {543--555},
  issn = {00928674},
  doi = {10.1016/0092-8674(93)90388-7},
  url = {https://linkinghub.elsevier.com/retrieve/pii/0092867493903887},
  urldate = {2022-11-18},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/4TIKQMZV/palladino1993.pdf.pdf}
}

@article{parducciAncientDNAUnlocking2004,
  title = {Ancient {{DNA}}: {{Unlocking Plants}}' {{Fossil Secrets}}},
  author = {Parducci, Laura and Petit, Rémy J.},
  date = {2004},
  journaltitle = {The New Phytologist},
  volume = {161},
  number = {2},
  eprint = {1514319},
  eprinttype = {jstor},
  pages = {335--339},
  publisher = {{[Wiley, New Phytologist Trust]}},
  issn = {0028646X, 14698137}
}

@article{pasrichaRevisitingWHOHaemoglobin2018,
  title = {Revisiting {{WHO}} Haemoglobin Thresholds to Define Anaemia in Clinical Medicine and Public Health},
  author = {Pasricha, Sant-Rayn and Colman, Katherine and Centeno-Tablante, Elizabeth and Garcia-Casal, Maria-Nieves and Peña-Rosas, Juan-Pablo},
  date = {2018-02-01},
  journaltitle = {The Lancet Haematology},
  shortjournal = {The Lancet Haematology},
  volume = {5},
  number = {2},
  pages = {e60-e62},
  issn = {2352-3026},
  doi = {10.1016/S2352-3026(18)30004-8},
  url = {https://www.sciencedirect.com/science/article/pii/S2352302618300048},
  urldate = {2022-11-17},
  abstract = {Women of reproductive age (WRA) are at increased risk for anemia and iron deficiency. However, there is limited population-level data in India, which could help inform evidence-based recommendations and policy. To conduct a population-based biomarker survey of anemia, iron deficiency, and inflammation in WRA in Southern India. Participants were WRA (15-40 y) who were not pregnant or lactating. Blood samples (n = 979) were collected and analyzed for hemoglobin (Hb), serum ferritin (SF), soluble transferrin receptor (sTfR), C-reactive protein (CRP), and alpha-1 acid glycoprotein (AGP). Anemia and severe anemia were defined as Hb {$<$} 12.0 and {$<$} 8.0 g/dL. Serum ferritin was adjusted for inflammation using BRINDA methods. Iron deficiency was defined as SF {$<$}15.0 μg/L, iron insufficiency was defined as SF {$<$} 20.0 and {$<$} 25.0 μg/L, and iron deficiency anemia was defined as Hb {$<$} 12.0 g/dL and SF {$<$} 15.0 μg/L. Inflammation was defined as CRP {$>$} 5.0 mg/L or AGP {$>$} 1.0 g/L. Restricted cubic spline regression models were also used to determine if alternative SF thresholds should be used t to classify iron deficiency. A total of 41.5\% of WRA had anemia, and 3.0\% had severe anemia. Findings from spline analyses suggested a SF cut-off of {$<$} 15.0 μg/L, consistent with conventional cut-offs for iron deficiency. 46.3\% of WRA had SF {$<$} 15.0 μg/L (BRINDA-adjusted: 61.5\%), 55.0\% had SF {$<$} 20.0 μg/L (72.7\%), 61.8\% had SF {$<$} 25.0 μg/L (81.0\%), and 30.0\% had IDA (34.5\%). 17.3\% of WRA had CRP {$>$} 5.0 mg/L and 22.2\% had AGP {$>$} 1.0 g/L. The prevalence of ID (rural vs. urban: 49.1\% vs. 34.9\%; p = 0.0004), iron insufficiency (57.8\% vs. 43.8\%; p = 0.0005), and IDA (31.8\% vs. 22.4\%; p = 0.01) were significantly higher in rural areas, although CRP levels were lower and there were no differences in elevated CRP or AGP. The burden of anemia and iron deficiency in this population was substantial, and increased after adjusting for inflammation, suggesting potential to benefit from screening and interventions. NCT04048330. Coronavirus has disrupted the natural order of the world since September 2019 with no specific medication. The beneficial effects of melatonin on sepsis and viral influenza were demonstrated previously, but its effects on covid-19, especially COVID -19 ICU, is unclear. Therefore, our aim was to determine the effects of melatonin in COVID-19 ICU patients. This is a retrospective cohort study in which the records of patients admitted to COVID -19 ICU of (XXX) during March to June 2020 were reviewed. According to inclusion criteria, patients who received 15 mg of melatonin daily were called MRG and the rest were called NMRG. Thirty-one patients were included and analyzed, of which twelve patients were in MRG. Demographic and clinical characteristics, and laboratory data were similar between two groups at ICU admission. Melatonin had no significant effect on ICU duration, CRP and ESR, also the trend of changes was in favor of melatonin. Nevertheless, melatonin significantly reduced the NLR (OR = −9.81, p = 0.003), and also declined mortality marginally (p = 0.09). Melatonin was well tolerated with no major adverse effects, moreover the thrombocytopenia occurrence was significantly lower in MRG (p = 0.005). In MRG, survival increased and mortality risk decreased, although the difference between groups wasn't significant (p = 0.37), which might be related to the small sample-size. Our study showed that melatonin is unlikely to reduce mortality among COVID19 patients and with no significant effect on disease-specific biochemical parameters. WHO's haemoglobin cutoffs to define anemia were based on five studies of predominantly White adult populations, done over 50 years ago. Therefore, a general re-examination of the existing haemoglobin cutoffs is warranted for global application, in representative healthy populations of children and adults. Such data are scarce in low-income and middle-income countries; however, a 2019, large-scale, nationally representative survey of children and adolescents aged 0–19 years in India (Comprehensive National Nutrition Survey [CNNS]) offered an opportunity for this re-examination. Using this survey, we aimed to assess the age-specific and sex-specific percentiles of haemoglobin and cutoffs to define anaemia in the CNNS population. For this population-based study, we constructed age-specific and sex-specific haemoglobin percentiles from values reported for a defined healthy population in the CNNS, which used rigorous quality control measures during sample collection and in the laboratory analyses. To obtain a healthy population, we excluded participants with iron, folate, vitamin B12, and retinol deficiencies; inflammation; variant haemoglobins (haemoglobin A2 and haemoglobin S); and history of smoking. We considered age-specific and sex-specific 5th percentiles of haemoglobin derived for this healthy population as the study cutoff to define anaemia. We compared these with existing WHO cutoffs to assess significant differences between them at each year of age and sex for quantifying the prevalence of anaemia in the entire CNNS sample. Between Feb 24, 2016, and Oct 26, 2018, the CNNS survey collected blood samples from 49 486 individuals. 41 210 participants had a haemoglobin value, 8087 of whom were included in our study and comprised the primary analytical sample. Compared with existing WHO cutoffs, the study cutoffs for haemoglobin were lower at all ages, usually by 1–2 g/dL, but more so in children of both sexes aged 1–2 years and in girls aged 10 years or older. Aanemia prevalence with the study cutoffs was 19·2 percentage points lower than with WHO cutoffs in the entire CNNS sample with valid haemoglobin values across all ages and sexes (10·8\% with study cutoffs vs 30·0\% with WHO cutoffs). These findings support the re-examination of WHO haemoglobin cutoffs to define anaemia. Our haemoglobin reference percentiles, derived from healthy participants in a large representative Indian survey, are suitable for national use in India. Substantial variations in the 5th percentile of haemoglobin values across the 1–19 years age range and between sexes argue against constructing common cutoffs in stratified age groups for convenience. None. For the Hindi, Punjabi, Tamil and Kannada translations of the abstract see Supplementary Materials section. The study aims to determine the picture risk factor-related anemia and the solution offered by searching the literature and reviewing it. using literature studies. Article searches using online data-based Science Direct, PubMed, and Google Scholar from 2019 to 2021. They have obtained as many as 20 articles based on inclusion criteria and relevance. Anaemia in young women is still a severe problem among the community. There need to be early detection measures to quickly determine the incidence of anemia and describe anemia as one of the information for all circles young women. Women have a higher risk of anemia, especially young women. This is due to strict dietary habits to prevent weight gain, resulting in malnutrition due to the unmet intake of essential nutrients for the body. Whereas in adolescence, there is an increase in iron demand due to growth and menstruation. Therefore it is very important to provide smart solutions to the incidence of anemia. One that is offered is early detection so that prevention can be done. The use of information technology can be used to conduct early detection of anemia in adolescents because it has been widely utilized among the community, especially adolescents. From the entire literature review, it seems that nutritional status greatly influences the incidence of anemia suffered, and young women are a target for the best intervention. Early detection and use of technology are innovative solutions offered. Anaemia in pregnancy is a global health problem with associated morbidity and mortality. A secondary analysis of prospective, population-based study from 2009 to 2016 to generate maternal haemoglobin normative centiles in uncomplicated pregnancies in women receiving optimal antenatal care. Pregnant women were enrolled {$<$}14 weeks’ gestation in the Fetal Growth Longitudinal Study (FGLS) of the INTERGROWTH-21st Project which involved eight geographically diverse urban areas in Brazil, China, India, Italy, Kenya, Oman, United Kingdom and United States. At each 5 ± 1 weekly visit until delivery, information was collected about the pregnancy, as well as the results of blood tests taken as part of routine antenatal care that complemented the study's requirements, including haemoglobin values. A total of 3502 (81\%) of 4321 women who delivered a live, singleton newborn with no visible congenital anomalies, contributed at least one haemoglobin value. Median haemoglobin concentrations ranged from 114.6 to 121.4 g/L, 94 to 103 g/L at the 3rd centile, and from 135 to 141 g/L at the 97th centile. The lowest values were seen between 31 and 32 weeks’ gestation, representing a mean drop of 6.8 g/L compared to 14 weeks’ gestation. The percentage variation in maternal haemoglobin within-site was 47\% of the total variance compared to 13\% between sites. We have generated International, gestational age-specific, smoothed centiles for maternal haemoglobin concentration compatible with better pregnancy outcomes, as well as adequate neonatal and early childhood morbidity, growth and development up to 2 years of age. Bill \& Melinda Gates Foundation Grant number 49038.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/G54MKJIT/pasricha2018.pdf.pdf;/Users/michelsen/Zotero/storage/R43AC6K8/Pasricha et al. - 2018 - Revisiting WHO haemoglobin thresholds to define an.pdf;/Users/michelsen/Zotero/storage/YTAVQ693/S2352302618300048.html}
}

@article{peyregnePresentDayDNAContamination2020,
  title = {Present-{{Day DNA Contamination}} in {{Ancient DNA Datasets}}},
  author = {Peyrégne, Stéphane and Prüfer, Kay},
  date = {2020},
  journaltitle = {BioEssays},
  volume = {42},
  number = {9},
  pages = {2000081},
  issn = {1521-1878},
  doi = {10.1002/bies.202000081},
  abstract = {Present-day contamination can lead to false conclusions in ancient DNA studies. A number of methods are available to estimate contamination, which use a variety of signals and are appropriate for different types of data. Here an overview of currently available methods highlighting their strengths and weaknesses is provided, and a classification based on the signals used to estimate contamination is proposed. This overview aims at enabling researchers to choose the most appropriate methods for their dataset. Based on this classification, potential avenues for the further development of methods are discussed.},
  langid = {english},
  keywords = {ancient DNA,contamination,paleogenomics},
  annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/bies.202000081},
  file = {/Users/michelsen/Zotero/storage/KBB3PELA/Peyrégne and Prüfer - 2020 - Present-Day DNA Contamination in Ancient DNA Datas.pdf;/Users/michelsen/Zotero/storage/LBU38AHA/10.1002@bies.202000081.pdf.pdf}
}

@unpublished{phanComposableEffectsFlexible2019,
  title = {Composable {{Effects}} for {{Flexible}} and {{Accelerated Probabilistic Programming}} in {{NumPyro}}},
  author = {Phan, Du and Pradhan, Neeraj and Jankowiak, Martin},
  date = {2019-12-24},
  eprint = {1912.11554},
  eprinttype = {arxiv},
  primaryclass = {cs, stat},
  abstract = {NumPyro is a lightweight library that provides an alternate NumPy backend to the Pyro probabilistic programming language with the same modeling interface, language primitives and effect handling abstractions. Effect handlers allow Pyro's modeling API to be extended to NumPyro despite its being built atop a fundamentally different JAX-based functional backend. In this work, we demonstrate the power of composing Pyro's effect handlers with the program transformations that enable hardware acceleration, automatic differentiation, and vectorization in JAX. In particular, NumPyro provides an iterative formulation of the No-U-Turn Sampler (NUTS) that can be end-to-end JIT compiled, yielding an implementation that is much faster than existing alternatives in both the small and large dataset regimes.},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Computer Science - Programming Languages,G.3,I.2.5,I.2.5; G.3,numpyro,Statistics - Machine Learning},
  file = {/Users/michelsen/Zotero/storage/9UA9GM25/Phan et al. - 2019 - Composable Effects for Flexible and Accelerated Pr.pdf;/Users/michelsen/Zotero/storage/75MJSLHW/1912.html}
}

@article{qianSingleParticleTracking1991,
  title = {Single Particle Tracking. {{Analysis}} of Diffusion and Flow in Two-Dimensional Systems},
  author = {Qian, H. and Sheetz, M. P. and Elson, E. L.},
  date = {1991-10-01},
  journaltitle = {Biophysical Journal},
  shortjournal = {Biophysical Journal},
  volume = {60},
  number = {4},
  pages = {910--921},
  issn = {0006-3495},
  doi = {10.1016/S0006-3495(91)82125-7},
  url = {https://www.sciencedirect.com/science/article/pii/S0006349591821257},
  urldate = {2022-11-18},
  abstract = {Analysis of the trajectories of small particles at high spatial and temporal resolution using video enhanced contrast microscopy provides a powerful approach to characterizing the mechanisms of particle motion in living cells and in other systems. We present here the theoretical basis for the analysis of these trajectories for particles undergoing random diffusion and/or systematic transport at uniform velocity in two-dimensional systems. The single particle tracking method, based on observations of the trajectories of individual particles, is compared with methods that characterize the motions of a large collection of particles such as fluorescence photobleaching recovery. Determination of diffusion coefficients or transport velocities either from correlation of positions or of velocities of the particles is discussed. A result of practical importance is an analysis of the dependence of the expected statistical uncertainty of these determinations on the number of position measurements. This provides a way of judging the accuracy of the diffusion coefficients and transport velocities obtained using this approach.},
  langid = {english},
  file = {/Users/michelsen/Zotero/storage/AYEYXDQB/Qian et al. - 1991 - Single particle tracking. Analysis of diffusion an.pdf;/Users/michelsen/Zotero/storage/HRT5EF6N/qian1991.pdf.pdf;/Users/michelsen/Zotero/storage/FE6FGWQ2/S0006349591821257.html}
}

@article{ramageHiddenMarkovModels2007,
  title = {Hidden {{Markov Models Fundamentals}}},
  author = {Ramage, Daniel},
  date = {2007-12-01},
  journaltitle = {CS229 Section Notes},
  pages = {13},
  langid = {english},
  keywords = {hidden,hmm,markov},
  file = {/Users/michelsen/Zotero/storage/QJEW5BQC/Ramage - Hidden Markov Models Fundamentals.pdf}
}

@incollection{renaudAuthenticationAssessmentContamination2019,
  title = {Authentication and {{Assessment}} of {{Contamination}} in {{Ancient DNA}}},
  booktitle = {Ancient {{DNA}}: {{Methods}} and {{Protocols}}},
  author = {Renaud, Gabriel and Schubert, Mikkel and Sawyer, Susanna and Orlando, Ludovic},
  editor = {Shapiro, Beth and Barlow, Axel and Heintzman, Peter D. and Hofreiter, Michael and Paijmans, Johanna L. A. and Soares, André E. R.},
  date = {2019},
  series = {Methods in {{Molecular Biology}}},
  pages = {163--194},
  publisher = {{Springer}},
  location = {{New York, NY}},
  doi = {10.1007/978-1-4939-9176-1_17},
  abstract = {Contamination from both present-day humans and postmortem microbial sources is a common challenge in ancient DNA studies. Here we present a suite of tools to assist in the assessment of contamination in ancient DNA data sets. These tools perform standard tests of authenticity of ancient DNA data including detecting the presence of postmortem damage signatures in sequence alignments and quantifying the amount of present-day human contamination.},
  isbn = {978-1-4939-9176-1},
  langid = {english},
  keywords = {Ancient DNA,Contamination,DICE,mapDamage2.0,Postmortem damage,Schmutzi},
  file = {/Users/michelsen/Zotero/storage/S8NFETLC/Renaud et al. - 2019 - Authentication and Assessment of Contamination in .pdf}
}

@article{rohlandPartialUracilDNAglycosylaseTreatment2015,
  title = {Partial Uracil-{{DNA-glycosylase}} Treatment for Screening of Ancient {{DNA}}},
  author = {Rohland, Nadin and Harney, Eadaoin and Mallick, Swapan and Nordenfelt, Susanne and Reich, David},
  date = {2015-01-19},
  journaltitle = {Philosophical Transactions of the Royal Society of London. Series B, Biological Sciences},
  shortjournal = {Philos Trans R Soc Lond B Biol Sci},
  volume = {370},
  number = {1660},
  eprint = {25487342},
  eprinttype = {pmid},
  pages = {20130624},
  issn = {1471-2970},
  doi = {10.1098/rstb.2013.0624},
  abstract = {The challenge of sequencing ancient DNA has led to the development of specialized laboratory protocols that have focused on reducing contamination and maximizing the number of molecules that are extracted from ancient remains. Despite the fact that success in ancient DNA studies is typically obtained by screening many samples to identify a promising subset, ancient DNA protocols have not, in general, focused on reducing the time required to screen samples. We present an adaptation of a popular ancient library preparation method that makes screening more efficient. First, the DNA extract is treated using a protocol that causes characteristic ancient DNA damage to be restricted to the terminal nucleotides, while nearly eliminating it in the interior of the DNA molecules, allowing a single library to be used both to test for ancient DNA authenticity and to carry out population genetic analysis. Second, the DNA molecules are ligated to a unique pair of barcodes, which eliminates undetected cross-contamination from this step onwards. Third, the barcoded library molecules include incomplete adapters of short length that can increase the specificity of hybridization-based genomic target enrichment. The adapters are completed just before sequencing, so the same DNA library can be used in multiple experiments, and the sequences distinguished. We demonstrate this protocol on 60 ancient human samples.},
  langid = {english},
  pmcid = {PMC4275898},
  keywords = {ancient DNA,authenticity,barcodes,Base Sequence,DNA,DNA Barcoding; Taxonomic,flexibility,Fossils,Gene Library,Genetic Testing,Genetics; Population,History; Ancient,Humans,library preparation,Molecular Sequence Data,Sequence Analysis; DNA,target capture,Uracil-DNA Glycosidase},
  file = {/Users/michelsen/Zotero/storage/5PWLEBWZ/Rohland et al. - 2015 - Partial uracil-DNA-glycosylase treatment for scree.pdf}
}

@article{schriderSupervisedMachineLearning2018,
  title = {Supervised {{Machine Learning}} for {{Population Genetics}}: {{A New Paradigm}}},
  shorttitle = {Supervised {{Machine Learning}} for {{Population Genetics}}},
  author = {Schrider, Daniel R. and Kern, Andrew D.},
  date = {2018-04-01},
  journaltitle = {Trends in Genetics},
  shortjournal = {Trends in Genetics},
  volume = {34},
  number = {4},
  pages = {301--312},
  issn = {0168-9525},
  doi = {10.1016/j.tig.2017.12.005},
  abstract = {As population genomic datasets grow in size, researchers are faced with the daunting task of making sense of a flood of information. To keep pace with this explosion of data, computational methodologies for population genetic inference are rapidly being developed to best utilize genomic sequence data. In this review we discuss a new paradigm that has emerged in computational population genomics: that of supervised machine learning (ML). We review the fundamentals of ML, discuss recent applications of supervised ML to population genetics that outperform competing methods, and describe promising future directions in this area. Ultimately, we argue that supervised ML is an important and underutilized tool that has considerable potential for the world of evolutionary genomics.},
  file = {/Users/michelsen/Zotero/storage/Q6SCUPCV/Schrider and Kern - 2018 - Supervised Machine Learning for Population Genetic.pdf;/Users/michelsen/Zotero/storage/FYU4DRC2/S0168952517302251.html}
}

@article{schubertImprovingAncientDNA2012,
  title = {Improving Ancient {{DNA}} Read Mapping against Modern Reference Genomes},
  author = {Schubert, Mikkel and Ginolhac, Aurelien and Lindgreen, Stinus and Thompson, John F and AL-Rasheid, Khaled AS and Willerslev, Eske and Krogh, Anders and Orlando, Ludovic},
  date = {2012-05-10},
  journaltitle = {BMC Genomics},
  shortjournal = {BMC Genomics},
  volume = {13},
  eprint = {22574660},
  eprinttype = {pmid},
  pages = {178},
  issn = {1471-2164},
  doi = {10.1186/1471-2164-13-178},
  abstract = {Background Next-Generation Sequencing has revolutionized our approach to ancient DNA (aDNA) research, by providing complete genomic sequences of ancient individuals and extinct species. However, the recovery of genetic material from long-dead organisms is still complicated by a number of issues, including post-mortem DNA damage and high levels of environmental contamination. Together with error profiles specific to the type of sequencing platforms used, these specificities could limit our ability to map sequencing reads against modern reference genomes and therefore limit our ability to identify endogenous ancient reads, reducing the efficiency of shotgun sequencing aDNA. Results In this study, we compare different computational methods for improving the accuracy and sensitivity of aDNA sequence identification, based on shotgun sequencing reads recovered from Pleistocene horse extracts using Illumina GAIIx and Helicos Heliscope platforms. We show that the performance of the Burrows Wheeler Aligner (BWA), that has been developed for mapping of undamaged sequencing reads using platforms with low rates of indel-types of sequencing errors, can be employed at acceptable run-times by modifying default parameters in a platform-specific manner. We also examine if trimming likely damaged positions at read ends can increase the recovery of genuine aDNA fragments and if accurate identification of human contamination can be achieved using a strategy previously suggested based on best hit filtering. We show that combining our different mapping and filtering approaches can increase the number of high-quality endogenous hits recovered by up to 33\%. Conclusions We have shown that Illumina and Helicos sequences recovered from aDNA extracts could not be aligned to modern reference genomes with the same efficiency unless mapping parameters are optimized for the specific types of errors generated by these platforms and by post-mortem DNA damage. Our findings have important implications for future aDNA research, as we define mapping guidelines that improve our ability to identify genuine aDNA sequences, which in turn could improve the genotyping accuracy of ancient specimens. Our framework provides a significant improvement to the standard procedures used for characterizing ancient genomes, which is challenged by contamination and often low amounts of DNA material.},
  pmcid = {PMC3468387},
  file = {/Users/michelsen/Zotero/storage/PFBHIWHF/Schubert et al. - 2012 - Improving ancient DNA read mapping against modern .pdf}
}

@article{sheehanDeepLearningPopulation2016,
  title = {Deep {{Learning}} for {{Population Genetic Inference}}},
  author = {Sheehan, Sara and Song, Yun S.},
  date = {2016-03-28},
  journaltitle = {PLOS Computational Biology},
  shortjournal = {PLOS Computational Biology},
  volume = {12},
  number = {3},
  pages = {e1004845},
  issn = {1553-7358},
  doi = {10.1371/journal.pcbi.1004845},
  abstract = {Given genomic variation data from multiple individuals, computing the likelihood of complex population genetic models is often infeasible. To circumvent this problem, we introduce a novel likelihood-free inference framework by applying deep learning, a powerful modern technique in machine learning. Deep learning makes use of multilayer neural networks to learn a feature-based function from the input (e.g., hundreds of correlated summary statistics of data) to the output (e.g., population genetic parameters of interest). We demonstrate that deep learning can be effectively employed for population genetic inference and learning informative features of data. As a concrete application, we focus on the challenging problem of jointly inferring natural selection and demography (in the form of a population size change history). Our method is able to separate the global nature of demography from the local nature of selection, without sequential steps for these two factors. Studying demography and selection jointly is motivated by Drosophila, where pervasive selection confounds demographic analysis. We apply our method to 197 African Drosophila melanogaster genomes from Zambia to infer both their overall demography, and regions of their genome under selection. We find many regions of the genome that have experienced hard sweeps, and fewer under selection on standing variation (soft sweep) or balancing selection. Interestingly, we find that soft sweeps and balancing selection occur more frequently closer to the centromere of each chromosome. In addition, our demographic inference suggests that previously estimated bottlenecks for African Drosophila melanogaster are too extreme.},
  langid = {english},
  keywords = {Drosophila melanogaster,Effective population size,Genomics statistics,Invertebrate genomics,Natural selection,Neural networks,Population genetics,Population size},
  file = {/Users/michelsen/Zotero/storage/PE2B8UNQ/Sheehan and Song - 2016 - Deep Learning for Population Genetic Inference.pdf;/Users/michelsen/Zotero/storage/Z8WHRA3J/article.html}
}

@article{slatkinStatisticalMethodsAnalyzing2016,
  title = {Statistical Methods for Analyzing Ancient {{DNA}} from Hominins},
  author = {Slatkin, Montgomery},
  date = {2016-12-01},
  journaltitle = {Current Opinion in Genetics \& Development},
  shortjournal = {Current Opinion in Genetics \& Development},
  series = {Genetics of Human Origin},
  volume = {41},
  pages = {72--76},
  issn = {0959-437X},
  doi = {10.1016/j.gde.2016.08.004},
  abstract = {In the past few years, the number of autosomal DNA sequences from human fossils has grown explosively and numerous partial or complete sequences are available from our closest relatives, Neanderthal and Denisovans. I review commonly used statistical methods applied to these sequences. These methods fall into three broad classes: methods for estimating levels of contamination, descriptive methods, and methods based on population genetic models. The latter two classes are largely methods developed for the analysis of present-day genomic data. When they are applied to ancient DNA (aDNA), they usually ignore the time dimension. A few methods, particularly those concerned with inferring something about selection or ancestor–descendant relationships, take explicit account of the ages of aDNA samples.},
  file = {/Users/michelsen/Zotero/storage/PDHNVEMZ/Slatkin - 2016 - Statistical methods for analyzing ancient DNA from.pdf;/Users/michelsen/Zotero/storage/NYGSH49N/S0959437X16301083.html}
}

@article{slatkoOverviewNextGeneration2018,
  title = {Overview of {{Next Generation Sequencing Technologies}}},
  author = {Slatko, Barton E. and Gardner, Andrew F. and Ausubel, Frederick M.},
  date = {2018-04},
  journaltitle = {Current protocols in molecular biology},
  shortjournal = {Curr Protoc Mol Biol},
  volume = {122},
  number = {1},
  eprint = {29851291},
  eprinttype = {pmid},
  pages = {e59},
  issn = {1934-3639},
  doi = {10.1002/cpmb.59},
  abstract = {High throughput DNA sequencing methodology (next generation sequencing; NGS) has rapidly evolved over the past 15 years and new methods are continually being commercialized. As the technology develops, so do increases in the number of corresponding applications for basic and applied science. The purpose of this review is to provide a compendium of NGS methodologies and associated applications. Each brief discussion is followed by web links to the manufacturer and/or web-based visualizations. Keyword searches, such as with Google, may also provide helpful internet links and information.},
  pmcid = {PMC6020069},
  file = {/Users/michelsen/Zotero/storage/EVFYQ7RR/slatko2018.pdf.pdf;/Users/michelsen/Zotero/storage/V8ZX3SNT/Slatko et al. - 2018 - Overview of Next Generation Sequencing Technologie.pdf}
}

@article{spencerAuthenticityAncientDNAResults2004,
  title = {Authenticity of {{Ancient-DNA Results}}: {{A Statistical Approach}}},
  shorttitle = {Authenticity of {{Ancient-DNA Results}}},
  author = {Spencer, Matthew and Howe, Christopher J.},
  date = {2004-08},
  journaltitle = {American Journal of Human Genetics},
  shortjournal = {Am J Hum Genet},
  volume = {75},
  number = {2},
  eprint = {15199524},
  eprinttype = {pmid},
  pages = {240--250},
  issn = {0002-9297},
  abstract = {Although there have been several papers recommending appropriate experimental designs for ancient-DNA studies, there have been few attempts at statistical analysis. We assume that we cannot decide whether a result is authentic simply by examining the sequence (e.g., when working with humans and domestic animals). We use a maximum-likelihood approach to estimate the probability that a positive result from a sample is (either partly or entirely) an amplification of DNA that was present in the sample before the experiment began. Our method is useful in two situations. First, we can decide in advance how many samples will be needed to achieve a given level of confidence. For example, to be almost certain (95\% confidence interval 0.96–1.00, maximum-likelihood estimate 1.00) that a positive result comes, at least in part, from DNA present before the experiment began, we need to analyze at least five samples and controls, even if all samples and no negative controls yield positive results. Second, we can decide how much confidence to place in results that have been obtained already, whether or not there are positive results from some controls. For example, the risk that at least one negative control yields a positive result increases with the size of the experiment, but the effects of occasional contamination are less severe in large experiments.},
  pmcid = {PMC1216058},
  file = {/Users/michelsen/Zotero/storage/QSS4IU3A/Spencer and Howe - 2004 - Authenticity of Ancient-DNA Results A Statistical.pdf}
}

@article{suDetectionIdentityDescent2012,
  title = {Detection of Identity by Descent Using Next-Generation Whole Genome Sequencing Data},
  author = {Su, Shu-Yi and Kasberger, Jay and Baranzini, Sergio and Byerley, William and Liao, Wilson and Oksenberg, Jorge and Sherr, Elliott and Jorgenson, Eric},
  date = {2012-06-06},
  journaltitle = {BMC Bioinformatics},
  shortjournal = {BMC Bioinformatics},
  volume = {13},
  eprint = {22672699},
  eprinttype = {pmid},
  pages = {121},
  issn = {1471-2105},
  doi = {10.1186/1471-2105-13-121},
  abstract = {Background Identity by descent (IBD) has played a fundamental role in the discovery of genetic loci underlying human diseases. Both pedigree-based and population-based linkage analyses rely on estimating recent IBD, and evidence of ancient IBD can be used to detect population structure in genetic association studies. Various methods for detecting IBD, including those implemented in the soft- ware programs fastIBD and GERMLINE, have been developed in the past several years using population genotype data from microarray platforms. Now, next-generation DNA sequencing data is becoming increasingly available, enabling the comprehensive analysis of genomes, in- cluding identifying rare variants. These sequencing data may provide an opportunity to detect IBD with higher resolution than previously possible, potentially enabling the detection of disease causing loci that were previously undetectable with sparser genetic data. Results Here, we investigate how different levels of variant coverage in sequencing and microarray genotype data influences the resolution at which IBD can be detected. This includes microarray genotype data from the WTCCC study, denser genotype data from the HapMap Project, low coverage sequencing data from the 1000 Genomes Project, and deep coverage complete genome data from our own projects. With high power (78\%), we can detect segments of length 0.4 cM or larger using fastIBD and GERMLINE in sequencing data. This compares to similar power to detect segments of length 1.0 cM or higher with microarray genotype data. We find that GERMLINE has slightly higher power than fastIBD for detecting IBD segments using sequencing data, but also has a much higher false positive rate. Conclusion We further quantify the effect of variant density, conditional on genetic map length, on the power to resolve IBD segments. These investigations into IBD resolution may help guide the design of future next generation sequencing studies that utilize IBD, including family-based association studies, association studies in admixed populations, and homozygosity mapping studies.},
  pmcid = {PMC3403908},
  file = {/Users/michelsen/Zotero/storage/HXAXM6CV/Su et al. - 2012 - Detection of identity by descent using next-genera.pdf}
}

@article{tangReviewMultiCompartmentInfectious2020,
  title = {A {{Review}} of {{Multi-Compartment Infectious Disease Models}}},
  author = {Tang, Lu and Zhou, Yiwang and Wang, Lili and Purkayastha, Soumik and Zhang, Leyao and He, Jie and Wang, Fei and Song, Peter X.-K.},
  date = {2020},
  journaltitle = {International Statistical Review},
  volume = {88},
  number = {2},
  pages = {462--513},
  issn = {1751-5823},
  doi = {10.1111/insr.12402},
  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/insr.12402},
  urldate = {2022-11-28},
  abstract = {Multi-compartment models have been playing a central role in modelling infectious disease dynamics since the early 20th century. They are a class of mathematical models widely used for describing the mechanism of an evolving epidemic. Integrated with certain sampling schemes, such mechanistic models can be applied to analyse public health surveillance data, such as assessing the effectiveness of preventive measures (e.g. social distancing and quarantine) and forecasting disease spread patterns. This review begins with a nationwide macromechanistic model and related statistical analyses, including model specification, estimation, inference and prediction. Then, it presents a community-level micromodel that enables high-resolution analyses of regional surveillance data to provide current and future risk information useful for local government and residents to make decisions on reopenings of local business and personal travels. r software and scripts are provided whenever appropriate to illustrate the numerical detail of algorithms and calculations. The coronavirus disease 2019 pandemic surveillance data from the state of Michigan are used for the illustration throughout this paper.},
  langid = {english},
  keywords = {antibody,cellular automaton,COVID-19,Markov chain Monte Carlo,risk prediction,spatio-temporal model,state-space model},
  annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/insr.12402},
  file = {/Users/michelsen/Zotero/storage/DPJKX223/10.1111@insr.12402.pdf.pdf;/Users/michelsen/Zotero/storage/VKFNVQW9/Tang et al. - 2020 - A Review of Multi-Compartment Infectious Disease M.pdf}
}

@article{tashmanOutofsampleTestsForecasting2000a,
  title = {Out-of-Sample Tests of Forecasting Accuracy: An Analysis and Review},
  shorttitle = {Out-of-Sample Tests of Forecasting Accuracy},
  author = {Tashman, Leonard J.},
  date = {2000},
  journaltitle = {International Journal of Forecasting},
  volume = {16},
  number = {4},
  pages = {437--450},
  publisher = {{Elsevier}},
  issn = {0169-2070},
  url = {https://econpapers.repec.org/article/eeeintfor/v_3a16_3ay_3a2000_3ai_3a4_3ap_3a437-450.htm},
  urldate = {2022-11-16},
  file = {/Users/michelsen/Zotero/storage/QNL4EJQ9/v_3a16_3ay_3a2000_3ai_3a4_3ap_3a437-450.html}
}

@online{thenobelassemblyatkarolinskainstitutetNobelPrizePhysiology2022,
  title = {The {{Nobel Prize}} in {{Physiology}} or {{Medicine}} 2022},
  author = {The Nobel Assembly at Karolinska Institutet},
  date = {2022-10-03},
  url = {https://www.nobelprize.org/prizes/medicine/2022/press-release/},
  urldate = {2022-11-14},
  abstract = {The Nobel Prize in Physiology or Medicine 2022 was awarded to Svante Pääbo "for his discoveries concerning the genomes of extinct hominins and human evolution"},
  langid = {american},
  organization = {{NobelPrize.org}},
  file = {/Users/michelsen/Zotero/storage/P4CPZ8KA/press-release.html}
}

@article{vandervalkMillionyearoldDNASheds2021,
  title = {Million-Year-Old {{DNA}} Sheds Light on the Genomic History of Mammoths},
  author = {van der Valk, Tom and Pečnerová, Patrícia and Díez-del-Molino, David and Bergström, Anders and Oppenheimer, Jonas and Hartmann, Stefanie and Xenikoudakis, Georgios and Thomas, Jessica A. and Dehasque, Marianne and Sağlıcan, Ekin and Fidan, Fatma Rabia and Barnes, Ian and Liu, Shanlin and Somel, Mehmet and Heintzman, Peter D. and Nikolskiy, Pavel and Shapiro, Beth and Skoglund, Pontus and Hofreiter, Michael and Lister, Adrian M. and Götherström, Anders and Dalén, Love},
  options = {useprefix=true},
  date = {2021-03},
  journaltitle = {Nature},
  volume = {591},
  number = {7849},
  pages = {265--269},
  publisher = {{Nature Publishing Group}},
  issn = {1476-4687},
  doi = {10.1038/s41586-021-03224-9},
  abstract = {Temporal genomic data hold great potential for studying evolutionary processes such as speciation. However, sampling across speciation events would, in many cases, require genomic time series that stretch well back into the Early Pleistocene subepoch. Although theoretical models suggest that DNA should survive on this timescale1, the oldest genomic data recovered so far are from a horse specimen dated to 780–560~thousand years ago2. Here we report the recovery of genome-wide data from three mammoth specimens dating to the Early and Middle Pleistocene subepochs, two of which are more than one million years old. We find that two distinct mammoth lineages were present in eastern Siberia during the Early Pleistocene. One of these lineages gave rise to the woolly mammoth and the other represents a previously unrecognized lineage that was ancestral to the first mammoths to colonize North America. Our analyses reveal that the Columbian mammoth of North America traces its ancestry to a Middle Pleistocene hybridization between these two lineages, with roughly equal admixture proportions. Finally, we show that the majority of protein-coding changes associated with cold adaptation in woolly mammoths were already present one million years ago. These findings highlight the potential of deep-time palaeogenomics to expand our understanding of speciation and long-term adaptive evolution.},
  issue = {7849},
  langid = {english},
  keywords = {Evolutionary genetics,Genetic variation,Genomics,Palaeontology},
  file = {/Users/michelsen/Zotero/storage/PIHNBTPH/vandervalk2021.pdf.pdf;/Users/michelsen/Zotero/storage/VMBYYUZB/van der Valk et al. - 2021 - Million-year-old DNA sheds light on the genomic hi.pdf;/Users/michelsen/Zotero/storage/GQ35Q8DN/s41586-021-03224-9.html}
}

@article{vestergaardOptimalEstimationDiffusion2014,
  title = {Optimal Estimation of Diffusion Coefficients from Single-Particle Trajectories},
  author = {Vestergaard, Christian L. and Blainey, Paul C. and Flyvbjerg, Henrik},
  date = {2014-02-28},
  journaltitle = {Physical Review E},
  shortjournal = {Phys. Rev. E},
  volume = {89},
  number = {2},
  pages = {022726},
  publisher = {{American Physical Society}},
  doi = {10.1103/PhysRevE.89.022726},
  url = {https://link.aps.org/doi/10.1103/PhysRevE.89.022726},
  urldate = {2022-11-18},
  abstract = {How does one optimally determine the diffusion coefficient of a diffusing particle from a single-time-lapse recorded trajectory of the particle? We answer this question with an explicit, unbiased, and practically optimal covariance-based estimator (CVE). This estimator is regression-free and is far superior to commonly used methods based on measured mean squared displacements. In experimentally relevant parameter ranges, it also outperforms the analytically intractable and computationally more demanding maximum likelihood estimator (MLE). For the case of diffusion on a flexible and fluctuating substrate, the CVE is biased by substrate motion. However, given some long time series and a substrate under some tension, an extended MLE can separate particle diffusion on the substrate from substrate motion in the laboratory frame. This provides benchmarks that allow removal of bias caused by substrate fluctuations in CVE. The resulting unbiased CVE is optimal also for short time series on a fluctuating substrate. We have applied our estimators to human 8-oxoguanine DNA glycolase proteins diffusing on flow-stretched DNA, a fluctuating substrate, and found that diffusion coefficients are severely overestimated if substrate fluctuations are not accounted for.},
  file = {/Users/michelsen/Zotero/storage/57R5R7PR/Vestergaard et al. - 2014 - Optimal estimation of diffusion coefficients from .pdf;/Users/michelsen/Zotero/storage/JCWDBMAK/vestergaard2014.pdf.pdf;/Users/michelsen/Zotero/storage/JMQGUQEY/PhysRevE.89.html}
}

@article{wangNgsLCAToolkitFast2022,
  title = {{{ngsLCA}}—{{A}} Toolkit for Fast and Flexible Lowest Common Ancestor Inference and Taxonomic Profiling of Metagenomic Data},
  author = {Wang, Yucheng and Korneliussen, Thorfinn Sand and Holman, Luke E. and Manica, Andrea and Pedersen, Mikkel Winther},
  date = {2022-10-13},
  journaltitle = {Methods in Ecology and Evolution},
  shortjournal = {Methods in Ecology and Evolution},
  volume = {n/a},
  number = {n/a},
  publisher = {{John Wiley \& Sons, Ltd}},
  issn = {2041-210X},
  doi = {10.1111/2041-210X.14006},
  url = {https://doi.org/10.1111/2041-210X.14006},
  urldate = {2022-11-22},
  abstract = {Abstract Metagenomic data generated from environmental samples is increasingly common in the analysis of modern and ancient biological communities. To obtain taxonomic profiles from this type of data, DNA sequences are aligned against large genomic reference databases and the lowest common ancestor (LCA) needs to be inferred for each sequence with multiple alignments. To date, efforts have mainly focused on improving the speed, sensitivity and specificity of alignment tools, and little effort has been applied to the LCA algorithm that generates the taxonomic profiles from alignments. We present ngsLCA, a command-line toolkit with two separate modules: the main program (in C/C++) performing LCA inference, and an R package for generating tables and visualisations of the taxonomic profiles. ngsLCA processed large datasets in BAM/SAM alignment format 4?11 times faster and used less memory compared to other available programs. It is compatible with the NCBI taxonomy and has flexible parameter settings. Furthermore, the toolkit offers functions for filtering, contamination removal, taxonomic clustering, and multiple ways of visualising the generated taxonomic profiles. ngsLCA bridges a gap in current metagenomic analyses by supplying a computationally light, easy-to-use, accurate, fast and flexible LCA algorithm with R functions for processing and illustrating the taxonomic profiles},
  keywords = {environmental DNA (eDNA),lowest common ancestor (LCA),metagenomics,next-generation sequencing,sedimentary ancient DNA (sedaDNA),shotgun sequencing,taxonomic profiling,toolkit}
}

@article{watanabeAsymptoticEquivalenceBayes2010a,
  title = {Asymptotic {{Equivalence}} of {{Bayes Cross Validation}} and {{Widely Applicable Information Criterion}} in {{Singular Learning Theory}}},
  author = {Watanabe, Sumio},
  date = {2010},
  journaltitle = {Journal of Machine Learning Research},
  volume = {11},
  number = {116},
  pages = {3571--3594},
  issn = {1533-7928},
  abstract = {In regular statistical models, the leave-one-out cross-validation is asymptotically equivalent to the Akaike information criterion. However, since many learning machines are singular statistical models, the asymptotic behavior of the cross-validation remains unknown. In previous studies, we established the singular learning theory and proposed a widely applicable information criterion, the expectation value of which is asymptotically equal to the average Bayes generalization loss. In the present paper, we theoretically compare the Bayes cross-validation loss and the widely applicable information criterion and prove two theorems. First, the Bayes cross-validation loss is asymptotically equivalent to the widely applicable information criterion as a random variable. Therefore, model selection and hyperparameter optimization using these two values are asymptotically equivalent. Second, the sum of the Bayes generalization error and the Bayes cross-validation error is asymptotically equal to 2λ/n, where λ is the real log canonical threshold and n is the number of training samples. Therefore the relation between the cross-validation error and the generalization error is determined by the algebraic geometrical structure of a learning machine. We also clarify that the deviance information criteria are different from the Bayes cross-validation and the widely applicable information criterion.},
  keywords = {waic}
}

@article{watsonMolecularStructureNucleic1953,
  title = {Molecular {{Structure}} of {{Nucleic Acids}}: {{A Structure}} for {{Deoxyribose Nucleic Acid}}},
  shorttitle = {Molecular {{Structure}} of {{Nucleic Acids}}},
  author = {Watson, J. D. and Crick, F. H. C.},
  date = {1953-04},
  journaltitle = {Nature},
  volume = {171},
  number = {4356},
  pages = {737--738},
  publisher = {{Nature Publishing Group}},
  issn = {1476-4687},
  doi = {10.1038/171737a0},
  url = {https://www.nature.com/articles/171737a0},
  urldate = {2022-11-18},
  abstract = {The determination in 1953 of the structure of deoxyribonucleic acid (DNA), with its two entwined helices and paired organic bases, was a tour de force in X-ray crystallography. But more significantly, it also opened the way for a deeper understanding of perhaps the most important biological process. In the words of Watson and Crick: "It has not escaped our notice that the specific pairing that we have postulated immediately suggests a possible copying mechanism for the genetic material." [Obituary of Francis Crick: Nature 430, 845-847 (2004); obituary of Maurice Wilkins: Nature 431, 922 (2004)]},
  issue = {4356},
  langid = {english},
  keywords = {Humanities and Social Sciences,multidisciplinary,Science},
  file = {/Users/michelsen/Zotero/storage/4TJL9T3F/watson1953.pdf.pdf;/Users/michelsen/Zotero/storage/YRDLBFHT/Watson and Crick - 1953 - Molecular Structure of Nucleic Acids A Structure .pdf;/Users/michelsen/Zotero/storage/Y2RBDCYX/171737a0.html}
}

@book{wilenskyIntroductionAgentBasedModeling2015,
  title = {An {{Introduction}} to {{Agent-Based Modeling}}},
  author = {Wilensky, Uri and Rand, William},
  date = {2015},
  eprint = {j.ctt17kk851},
  eprinttype = {jstor},
  publisher = {{The MIT Press}},
  abstract = {The advent of widespread fast computing has enabled us to work on more complex problems and to build and analyze more complex models. This book provides an introduction to one of the primary methodologies for research in this new field of knowledge. Agent-based modeling (ABM) offers a new way of doing science: by conducting computer-based experiments. ABM is applicable to complex systems embedded in natural, social, and engineered contexts, across domains that range from engineering to ecology.{$<$}em{$>$}An Introduction to Agent-Based Modeling{$<$}/em{$>$}offers a comprehensive description of the core concepts, methods, and applications of ABM. Its hands-on approach -- with hundreds of examples and exercises using NetLogo -- enables readers to begin constructing models immediately, regardless of experience or discipline.The book first describes the nature and rationale of agent-based modeling, then presents the methodology for designing and building ABMs, and finally discusses how to utilize ABMs to answer complex questions. Features in each chapter include step-by-step guides to developing models in the main text; text boxes with additional information and concepts; end-of-chapter explorations; and references and lists of relevant reading. There is also an accompanying website with all the models and code.},
  isbn = {978-0-262-73189-8}
}

@article{wuMultistageTimeDelayControl2022,
  title = {A {{Multistage Time-Delay Control Model}} for {{COVID-19 Transmission}}},
  author = {Wu, Zhuang and Wang, Yuanyuan and Gao, Jing and Song, Jiayang and Zhang, Yi},
  date = {2022-01},
  journaltitle = {Sustainability},
  volume = {14},
  number = {21},
  pages = {14657},
  publisher = {{Multidisciplinary Digital Publishing Institute}},
  issn = {2071-1050},
  doi = {10.3390/su142114657},
  url = {https://www.mdpi.com/2071-1050/14/21/14657},
  urldate = {2022-11-28},
  abstract = {With the transmission of the COVID-19 epidemic at home and abroad, this paper considers the spread process in China, improves the classic epidemic SEIR model, and establishes a multistage time-delay control model (MTCM) for COVID-19 transmission. The MTCM divides the spread of COVID-19 into three periods: the outbreak period, the control period and the steady period. The classical SEIR model, the improved SEQIR model and the SEQIR Ⅱ model correspond to the three periods. The classical SEIR model was adopted for the outbreak period and yielded results that were consistent with the observed early propagation of COVID-19 transmission. In the control period, adding isolation measures and a time delay to the MTCM and adjusting the rates yielded a better simulation effect. In the steady period, the focus of consideration is the number of new patients, population movement (in-migration and out-migration of the population) and patient classification (symptomatic and asymptomatic patients). The MCTM was used for simulation, and the comparison results revealed that the simulated data of the MCTM (improved SEQIR model) and the actual data are similar in the control period. The control policy of isolation measures is effective. New infections, population flow and patients with symptomatic or asymptomatic symptoms are more consistent with the steady period characteristics. The multi-stage time-delay control model for COVID-19 transmission provides theoretical methods and good prevention and control measures for future epidemic policy formulation.},
  issue = {21},
  langid = {english},
  keywords = {COVID-19,isolation measures,SEIR model,SEQIR Ⅱ model},
  file = {/Users/michelsen/Zotero/storage/Y645E2X2/14657.html}
}